mirror of https://github.com/milvus-io/milvus.git
feat: introduce third-party milvus-storage (#39418)
related: https://github.com/milvus-io/milvus/issues/39173 Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>pull/39586/head
parent
f32830e016
commit
c4ae9f4ece
|
@ -307,7 +307,8 @@ ${CMAKE_EXTRA_ARGS} \
|
|||
-DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} \
|
||||
-DCPU_ARCH=${CPU_ARCH} \
|
||||
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} "
|
||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \
|
||||
-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} "
|
||||
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
||||
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
||||
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
||||
|
|
18
go.mod
18
go.mod
|
@ -18,7 +18,7 @@ require (
|
|||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/go-playground/validator/v10 v10.14.0
|
||||
github.com/gofrs/flock v0.8.1
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/protobuf v1.5.4
|
||||
github.com/google/btree v1.1.2
|
||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
|
||||
github.com/klauspost/compress v1.17.9
|
||||
|
@ -101,9 +101,9 @@ require (
|
|||
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect
|
||||
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
|
||||
github.com/alibabacloud-go/tea v1.1.8 // indirect
|
||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
||||
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||
github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 // indirect
|
||||
github.com/apache/thrift v0.18.1 // indirect
|
||||
github.com/apache/thrift v0.19.0 // indirect
|
||||
github.com/ardielle/ardielle-go v1.5.2 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect
|
||||
|
@ -158,7 +158,7 @@ require (
|
|||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
github.com/golang/mock v1.6.0 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/flatbuffers v2.0.8+incompatible // indirect
|
||||
github.com/google/flatbuffers v24.3.25+incompatible // indirect
|
||||
github.com/google/s2a-go v0.1.7 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
||||
|
@ -205,7 +205,7 @@ require (
|
|||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect
|
||||
github.com/pierrec/lz4 v2.5.2+incompatible // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.18 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
||||
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect
|
||||
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect
|
||||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect
|
||||
|
@ -260,13 +260,13 @@ require (
|
|||
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
|
||||
go.uber.org/automaxprocs v1.5.3 // indirect
|
||||
golang.org/x/arch v0.3.0 // indirect
|
||||
golang.org/x/mod v0.17.0 // indirect
|
||||
golang.org/x/mod v0.18.0 // indirect
|
||||
golang.org/x/sys v0.28.0 // indirect
|
||||
golang.org/x/term v0.27.0 // indirect
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
|
||||
gonum.org/v1/gonum v0.11.0 // indirect
|
||||
golang.org/x/tools v0.22.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
|
||||
gonum.org/v1/gonum v0.14.0 // indirect
|
||||
google.golang.org/genproto v0.0.0-20240624140628-dc46fd24d27d // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect
|
||||
|
|
32
go.sum
32
go.sum
|
@ -104,14 +104,14 @@ github.com/alibabacloud-go/tea v1.1.8 h1:vFF0707fqjGiQTxrtMnIXRjOCvQXf49CuDVRtTo
|
|||
github.com/alibabacloud-go/tea v1.1.8/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
|
||||
github.com/aliyun/credentials-go v1.2.7 h1:gLtFylxLZ1TWi1pStIt1O6a53GFU1zkNwjtJir2B4ow=
|
||||
github.com/aliyun/credentials-go v1.2.7/go.mod h1:/KowD1cfGSLrLsH28Jr8W+xwoId0ywIy5lNzDz6O1vw=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
|
||||
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
|
||||
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q=
|
||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
|
||||
github.com/apache/thrift v0.18.1 h1:lNhK/1nqjbwbiOPDBPFJVKxgDEGSepKuTh6OLiXW8kg=
|
||||
github.com/apache/thrift v0.18.1/go.mod h1:rdQn/dCcDKEWjjylUeueum4vQEjG2v8v2PqriUnbr+I=
|
||||
github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk=
|
||||
github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I=
|
||||
github.com/ardielle/ardielle-go v1.5.2 h1:TilHTpHIQJ27R1Tl/iITBzMwiUGSlVfiVhwDNGM3Zj4=
|
||||
github.com/ardielle/ardielle-go v1.5.2/go.mod h1:I4hy1n795cUhaVt/ojz83SNVCYIGsAFAONtv2Dr7HUI=
|
||||
github.com/ardielle/ardielle-tools v1.5.4/go.mod h1:oZN+JRMnqGiIhrzkRN9l26Cej9dEx4jeNG6A+AdkShk=
|
||||
|
@ -427,8 +427,8 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ
|
|||
github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
|
||||
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
||||
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM=
|
||||
github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
|
||||
github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||
github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
|
@ -742,8 +742,8 @@ github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2
|
|||
github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
|
||||
github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI=
|
||||
github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
|
||||
github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
||||
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
||||
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4=
|
||||
|
@ -1122,8 +1122,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|||
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
|
||||
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
@ -1367,19 +1367,19 @@ golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
|||
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
|
||||
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
|
||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
|
||||
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
|
||||
gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
|
||||
gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E=
|
||||
gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA=
|
||||
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
||||
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
||||
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
|
||||
|
|
|
@ -13,7 +13,7 @@ class MilvusConan(ConanFile):
|
|||
"lz4/1.9.4#c5afb86edd69ac0df30e3a9e192e43db",
|
||||
"snappy/1.1.9#0519333fef284acd04806243de7d3070",
|
||||
"lzo/2.10#9517fc1bcc4d4cc229a79806003a1baa",
|
||||
"arrow/15.0.0#0456d916ff25d509e0724c5b219b4c45",
|
||||
"arrow/17.0.0#8cea917a6e06ca17c28411966d6fcdd7",
|
||||
"openssl/3.1.2#02594c4c0a6e2b4feb3cd15119993597",
|
||||
"aws-sdk-cpp/1.9.234#28d6d2c175975900ce292bafe8022c88",
|
||||
"googleapis/cci.20221108#65604e1b3b9a6b363044da625b201a2a",
|
||||
|
@ -72,6 +72,7 @@ class MilvusConan(ConanFile):
|
|||
"aws-sdk-cpp:transfer": False,
|
||||
"gtest:build_gmock": False,
|
||||
"boost:without_locale": False,
|
||||
"boost:without_test": True,
|
||||
"glog:with_gflags": True,
|
||||
"glog:shared": True,
|
||||
"prometheus-cpp:with_pull": False,
|
||||
|
|
|
@ -32,6 +32,7 @@ include_directories(
|
|||
${SIMDJSON_INCLUDE_DIR}
|
||||
${TANTIVY_INCLUDE_DIR}
|
||||
${CONAN_INCLUDE_DIRS}
|
||||
${MILVUS_STORAGE_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
add_subdirectory( pb )
|
||||
|
@ -73,6 +74,7 @@ set(LINK_TARGETS
|
|||
simdjson
|
||||
tantivy_binding
|
||||
knowhere
|
||||
milvus-storage
|
||||
${OpenMP_CXX_FLAGS}
|
||||
${CONAN_LIBS})
|
||||
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "segcore/packed_reader_c.h"
|
||||
#include "milvus-storage/packed/reader.h"
|
||||
#include "milvus-storage/common/log.h"
|
||||
#include "milvus-storage/filesystem/fs.h"
|
||||
#include "milvus-storage/common/config.h"
|
||||
|
||||
#include <arrow/c/bridge.h>
|
||||
#include <arrow/filesystem/filesystem.h>
|
||||
#include <arrow/status.h>
|
||||
#include <memory>
|
||||
|
||||
int
|
||||
NewPackedReader(const char* path,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CPackedReader* c_packed_reader) {
|
||||
try {
|
||||
auto truePath = std::string(path);
|
||||
auto factory = std::make_shared<milvus_storage::FileSystemFactory>();
|
||||
auto conf = milvus_storage::StorageConfig();
|
||||
conf.uri = "file:///tmp/";
|
||||
auto trueFs = factory->BuildFileSystem(conf, &truePath).value();
|
||||
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||
std::set<int> needed_columns;
|
||||
for (int i = 0; i < trueSchema->num_fields(); i++) {
|
||||
needed_columns.emplace(i);
|
||||
}
|
||||
auto reader = std::make_unique<milvus_storage::PackedRecordBatchReader>(
|
||||
*trueFs, path, trueSchema, needed_columns, buffer_size);
|
||||
*c_packed_reader = reader.release();
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
ReadNext(CPackedReader c_packed_reader,
|
||||
CArrowArray* out_array,
|
||||
CArrowSchema* out_schema) {
|
||||
try {
|
||||
auto packed_reader =
|
||||
static_cast<milvus_storage::PackedRecordBatchReader*>(
|
||||
c_packed_reader);
|
||||
std::shared_ptr<arrow::RecordBatch> record_batch;
|
||||
auto status = packed_reader->ReadNext(&record_batch);
|
||||
if (!status.ok()) {
|
||||
return -1;
|
||||
}
|
||||
if (record_batch == nullptr) {
|
||||
// end of file
|
||||
return 0;
|
||||
} else {
|
||||
std::unique_ptr<ArrowArray> arr = std::make_unique<ArrowArray>();
|
||||
std::unique_ptr<ArrowSchema> schema =
|
||||
std::make_unique<ArrowSchema>();
|
||||
auto status = arrow::ExportRecordBatch(
|
||||
*record_batch, arr.get(), schema.get());
|
||||
if (!status.ok()) {
|
||||
return -1;
|
||||
}
|
||||
*out_array = arr.release();
|
||||
*out_schema = schema.release();
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
CloseReader(CPackedReader c_packed_reader) {
|
||||
try {
|
||||
auto packed_reader =
|
||||
static_cast<milvus_storage::PackedRecordBatchReader*>(
|
||||
c_packed_reader);
|
||||
delete packed_reader;
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <arrow/c/abi.h>
|
||||
|
||||
typedef void* CPackedReader;
|
||||
typedef void* CArrowArray;
|
||||
typedef void* CArrowSchema;
|
||||
|
||||
/**
|
||||
* @brief Open a packed reader to read needed columns in the specified path.
|
||||
*
|
||||
* @param path The root path of the packed files to read.
|
||||
* @param schema The original schema of data.
|
||||
* @param buffer_size The max buffer size of the packed reader.
|
||||
* @param c_packed_reader The output pointer of the packed reader.
|
||||
*/
|
||||
int
|
||||
NewPackedReader(const char* path,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CPackedReader* c_packed_reader);
|
||||
|
||||
/**
|
||||
* @brief Read the next record batch from the packed reader.
|
||||
* By default, the maximum return batch is 1024 rows.
|
||||
*
|
||||
* @param c_packed_reader The packed reader to read.
|
||||
* @param out_array The output pointer of the arrow array.
|
||||
* @param out_schema The output pointer of the arrow schema.
|
||||
*/
|
||||
int
|
||||
ReadNext(CPackedReader c_packed_reader,
|
||||
CArrowArray* out_array,
|
||||
CArrowSchema* out_schema);
|
||||
|
||||
/**
|
||||
* @brief Close the packed reader and release the resources.
|
||||
*
|
||||
* @param c_packed_reader The packed reader to close.
|
||||
*/
|
||||
int
|
||||
CloseReader(CPackedReader c_packed_reader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "segcore/packed_writer_c.h"
|
||||
#include "milvus-storage/packed/writer.h"
|
||||
#include "milvus-storage/common/log.h"
|
||||
#include "milvus-storage/common/config.h"
|
||||
#include "milvus-storage/filesystem/fs.h"
|
||||
|
||||
#include <arrow/c/bridge.h>
|
||||
#include <arrow/filesystem/filesystem.h>
|
||||
|
||||
int
|
||||
NewPackedWriter(const char* path,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CPackedWriter* c_packed_writer) {
|
||||
try {
|
||||
auto truePath = std::string(path);
|
||||
auto factory = std::make_shared<milvus_storage::FileSystemFactory>();
|
||||
auto conf = milvus_storage::StorageConfig();
|
||||
conf.uri = "file:///tmp/";
|
||||
auto trueFs = factory->BuildFileSystem(conf, &truePath).value();
|
||||
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||
auto writer = std::make_unique<milvus_storage::PackedRecordBatchWriter>(
|
||||
buffer_size, trueSchema, trueFs, truePath, conf);
|
||||
|
||||
*c_packed_writer = writer.release();
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
WriteRecordBatch(CPackedWriter c_packed_writer,
|
||||
struct ArrowArray* array,
|
||||
struct ArrowSchema* schema) {
|
||||
try {
|
||||
auto packed_writer =
|
||||
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
||||
c_packed_writer);
|
||||
auto record_batch =
|
||||
arrow::ImportRecordBatch(array, schema).ValueOrDie();
|
||||
auto status = packed_writer->Write(record_batch);
|
||||
if (!status.ok()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
CloseWriter(CPackedWriter c_packed_writer) {
|
||||
try {
|
||||
auto packed_writer =
|
||||
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
||||
c_packed_writer);
|
||||
auto status = packed_writer->Close();
|
||||
delete packed_writer;
|
||||
if (!status.ok()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
} catch (std::exception& e) {
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <arrow/c/abi.h>
|
||||
|
||||
typedef void* CPackedWriter;
|
||||
|
||||
int
|
||||
NewPackedWriter(const char* path,
|
||||
struct ArrowSchema* schema,
|
||||
const int64_t buffer_size,
|
||||
CPackedWriter* c_packed_writer);
|
||||
|
||||
int
|
||||
WriteRecordBatch(CPackedWriter c_packed_writer,
|
||||
struct ArrowArray* array,
|
||||
struct ArrowSchema* schema);
|
||||
|
||||
int
|
||||
CloseWriter(CPackedWriter c_packed_writer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -43,7 +43,7 @@ PayloadReader::init(std::shared_ptr<arrow::io::BufferReader> input,
|
|||
// Configure general Parquet reader settings
|
||||
auto reader_properties = parquet::ReaderProperties(pool);
|
||||
reader_properties.set_buffer_size(4096 * 4);
|
||||
reader_properties.enable_buffered_stream();
|
||||
// reader_properties.enable_buffered_stream();
|
||||
|
||||
// Configure Arrow-specific Parquet reader settings
|
||||
auto arrow_reader_props = parquet::ArrowReaderProperties();
|
||||
|
|
|
@ -45,3 +45,4 @@ if (LINUX)
|
|||
add_subdirectory(jemalloc)
|
||||
endif()
|
||||
|
||||
add_subdirectory(milvus-storage)
|
|
@ -0,0 +1,51 @@
|
|||
#-------------------------------------------------------------------------------
|
||||
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Update milvus-storage_VERSION for the first occurrence
|
||||
milvus_add_pkg_config("milvus-storage")
|
||||
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
||||
set( milvus-storage_VERSION 7475494 )
|
||||
set( GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git")
|
||||
message(STATUS "milvus-storage repo: ${GIT_REPOSITORY}")
|
||||
message(STATUS "milvus-storage version: ${milvus-storage_VERSION}")
|
||||
|
||||
message(STATUS "Building milvus-storage-${milvus-storage_SOURCE_VER} from source")
|
||||
message(STATUS ${CMAKE_BUILD_TYPE})
|
||||
|
||||
if ( ENABLE_AZURE_FS STREQUAL "ON" )
|
||||
set(WITH_AZURE_FS ON CACHE BOOL "" FORCE )
|
||||
else ()
|
||||
set(WITH_AZURE_FS OFF CACHE BOOL "" FORCE )
|
||||
endif ()
|
||||
|
||||
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
|
||||
FetchContent_Declare(
|
||||
milvus-storage
|
||||
GIT_REPOSITORY ${GIT_REPOSITORY}
|
||||
GIT_TAG ${milvus-storage_VERSION}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src
|
||||
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build
|
||||
SOURCE_SUBDIR cpp
|
||||
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
||||
|
||||
FetchContent_GetProperties( milvus-storage )
|
||||
if ( NOT milvus-storage_POPULATED )
|
||||
FetchContent_Populate( milvus-storage )
|
||||
|
||||
# Adding the following target:
|
||||
# milvus-storage
|
||||
add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp
|
||||
${milvus-storage_BINARY_DIR} )
|
||||
endif()
|
||||
|
||||
set( MILVUS_STORAGE_INCLUDE_DIR ${milvus-storage_SOURCE_DIR}/cpp/include CACHE INTERNAL "Path to milvus-storage include directory" )
|
|
@ -0,0 +1,9 @@
|
|||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: Milvus Storage
|
||||
Description: Storage modules for Milvus
|
||||
Version: @MILVUS_VERSION@
|
||||
|
||||
Libs: -L${libdir} -lmilvus-storage
|
||||
Cflags: -I${includedir}
|
|
@ -16,6 +16,7 @@ include_directories(
|
|||
${SIMDJSON_INCLUDE_DIR}
|
||||
${TANTIVY_INCLUDE_DIR}
|
||||
${CONAN_INCLUDE_DIRS}
|
||||
${MILVUS_STORAGE_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
add_definitions(-DMILVUS_TEST_SEGCORE_YAML_PATH="${CMAKE_SOURCE_DIR}/unittest/test_utils/test_segcore.yaml")
|
||||
|
@ -157,6 +158,7 @@ if (LINUX)
|
|||
gtest
|
||||
milvus_core
|
||||
knowhere
|
||||
milvus-storage
|
||||
)
|
||||
install(TARGETS index_builder_test DESTINATION unittest)
|
||||
endif()
|
||||
|
@ -169,6 +171,7 @@ target_link_libraries(all_tests
|
|||
gtest
|
||||
milvus_core
|
||||
knowhere
|
||||
milvus-storage
|
||||
)
|
||||
|
||||
install(TARGETS all_tests DESTINATION unittest)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
reviewers:
|
||||
- tedxu
|
||||
- shaoting-huang
|
||||
- sunby
|
||||
|
||||
approvers:
|
||||
- maintainers
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package arrowutil
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/apache/arrow/go/v12/parquet/file"
|
||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
func MakeArrowFileReader(fs fs.Fs, filePath string) (*pqarrow.FileReader, error) {
|
||||
f, err := fs.OpenFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parquetReader, err := file.NewParquetReader(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator)
|
||||
}
|
||||
|
||||
func MakeArrowRecordReader(reader *pqarrow.FileReader, opts *options.ReadOptions) (array.RecordReader, error) {
|
||||
var rowGroupsIndices []int
|
||||
var columnIndices []int
|
||||
metadata := reader.ParquetReader().MetaData()
|
||||
for _, c := range opts.Columns {
|
||||
columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(c))
|
||||
}
|
||||
for _, f := range opts.Filters {
|
||||
columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(f.GetColumnName()))
|
||||
}
|
||||
|
||||
for i := 0; i < len(metadata.RowGroups); i++ {
|
||||
rg := metadata.RowGroup(i)
|
||||
var canIgnored bool
|
||||
for _, filter := range opts.Filters {
|
||||
columnIndex := rg.Schema.ColumnIndexByName(filter.GetColumnName())
|
||||
columnChunk, err := rg.ColumnChunk(columnIndex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
columnStats, err := columnChunk.Statistics()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if columnStats == nil || !columnStats.HasMinMax() {
|
||||
continue
|
||||
}
|
||||
if filter.CheckStatistics(columnStats) {
|
||||
canIgnored = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !canIgnored {
|
||||
rowGroupsIndices = append(rowGroupsIndices, i)
|
||||
}
|
||||
}
|
||||
|
||||
return reader.GetRecordReader(context.TODO(), columnIndices, rowGroupsIndices)
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package constant
|
||||
|
||||
const (
|
||||
ReadBatchSize = 1024
|
||||
ManifestTempFileSuffix = ".manifest.tmp"
|
||||
ManifestFileSuffix = ".manifest"
|
||||
ManifestDir = "versions"
|
||||
BlobDir = "blobs"
|
||||
ParquetDataFileSuffix = ".parquet"
|
||||
OffsetFieldName = "__offset"
|
||||
VectorDataDir = "vector"
|
||||
ScalarDataDir = "scalar"
|
||||
DeleteDataDir = "delete"
|
||||
LatestManifestVersion = -1
|
||||
|
||||
EndpointOverride = "endpoint_override"
|
||||
)
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package errors
|
||||
|
||||
import "github.com/cockroachdb/errors"
|
||||
|
||||
var (
|
||||
ErrSchemaIsNil = errors.New("schema is nil")
|
||||
ErrBlobAlreadyExist = errors.New("blob already exist")
|
||||
ErrBlobNotExist = errors.New("blob not exist")
|
||||
ErrSchemaNotMatch = errors.New("schema not match")
|
||||
ErrColumnNotExist = errors.New("column not exist")
|
||||
ErrInvalidPath = errors.New("invalid path")
|
||||
ErrNoEndpoint = errors.New("no endpoint is specified")
|
||||
)
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package log
|
||||
|
||||
import "go.uber.org/zap"
|
||||
|
||||
var (
|
||||
// not lint
|
||||
Skip = zap.Skip
|
||||
Binary = zap.Binary
|
||||
Bool = zap.Bool
|
||||
Boolp = zap.Boolp
|
||||
ByteString = zap.ByteString
|
||||
Complex128 = zap.Complex128
|
||||
Complex128p = zap.Complex128p
|
||||
Complex64 = zap.Complex64
|
||||
Complex64p = zap.Complex64p
|
||||
Float64 = zap.Float64
|
||||
Float64p = zap.Float64p
|
||||
Float32 = zap.Float32
|
||||
Float32p = zap.Float32p
|
||||
Int = zap.Int
|
||||
Intp = zap.Intp
|
||||
Int64 = zap.Int64
|
||||
Int64p = zap.Int64p
|
||||
Int32 = zap.Int32
|
||||
Int32p = zap.Int32p
|
||||
Int16 = zap.Int16
|
||||
Int16p = zap.Int16p
|
||||
Int8 = zap.Int8
|
||||
Int8p = zap.Int8p
|
||||
String = zap.String
|
||||
Stringp = zap.Stringp
|
||||
Uint = zap.Uint
|
||||
Uintp = zap.Uintp
|
||||
Uint64 = zap.Uint64
|
||||
Uint64p = zap.Uint64p
|
||||
Uint32 = zap.Uint32
|
||||
Uint32p = zap.Uint32p
|
||||
Uint16 = zap.Uint16
|
||||
Uint16p = zap.Uint16p
|
||||
Uint8 = zap.Uint8
|
||||
Uint8p = zap.Uint8p
|
||||
Uintptr = zap.Uintptr
|
||||
Uintptrp = zap.Uintptrp
|
||||
Reflect = zap.Reflect
|
||||
Namespace = zap.Namespace
|
||||
Stringer = zap.Stringer
|
||||
Time = zap.Time
|
||||
Timep = zap.Timep
|
||||
Stack = zap.Stack
|
||||
StackSkip = zap.StackSkip
|
||||
Duration = zap.Duration
|
||||
Durationp = zap.Durationp
|
||||
Object = zap.Object
|
||||
Inline = zap.Inline
|
||||
Any = zap.Any
|
||||
)
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package log
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
)
|
||||
|
||||
type Level = zapcore.Level
|
||||
|
||||
const (
|
||||
DebugLevel = zapcore.DebugLevel
|
||||
InfoLevel = zapcore.InfoLevel
|
||||
WarnLevel = zapcore.WarnLevel
|
||||
ErrorLevel = zapcore.ErrorLevel
|
||||
PanicLevel = zapcore.PanicLevel
|
||||
FatalLevel = zapcore.FatalLevel
|
||||
)
|
||||
|
||||
type Logger struct {
|
||||
l *zap.Logger
|
||||
al *zap.AtomicLevel
|
||||
}
|
||||
|
||||
func New(out io.Writer, level Level) *Logger {
|
||||
if out == nil {
|
||||
out = os.Stderr
|
||||
}
|
||||
|
||||
al := zap.NewAtomicLevelAt(level)
|
||||
cfg := zap.NewDevelopmentEncoderConfig()
|
||||
|
||||
core := zapcore.NewCore(
|
||||
zapcore.NewConsoleEncoder(cfg),
|
||||
zapcore.AddSync(out),
|
||||
al,
|
||||
)
|
||||
return &Logger{l: zap.New(core, zap.AddCaller(), zap.AddCallerSkip(2)), al: &al}
|
||||
}
|
||||
|
||||
func (l *Logger) SetLevel(level Level) {
|
||||
if l.al != nil {
|
||||
l.al.SetLevel(level)
|
||||
}
|
||||
}
|
||||
|
||||
type Field = zap.Field
|
||||
|
||||
func (l *Logger) Debug(msg string, fields ...Field) {
|
||||
l.l.Debug(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Info(msg string, fields ...Field) {
|
||||
l.l.Info(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Warn(msg string, fields ...Field) {
|
||||
l.l.Warn(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Error(msg string, fields ...Field) {
|
||||
l.l.Error(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Panic(msg string, fields ...Field) {
|
||||
l.l.Panic(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Fatal(msg string, fields ...Field) {
|
||||
l.l.Fatal(msg, fields...)
|
||||
}
|
||||
|
||||
func (l *Logger) Sync() error {
|
||||
return l.l.Sync()
|
||||
}
|
||||
|
||||
var std = New(os.Stderr, DebugLevel)
|
||||
|
||||
func Default() *Logger { return std }
|
||||
func ReplaceDefault(l *Logger) { std = l }
|
||||
func SetLevel(level Level) { std.SetLevel(level) }
|
||||
|
||||
func Debug(msg string, fields ...Field) { std.Debug(msg, fields...) }
|
||||
func Info(msg string, fields ...Field) { std.Info(msg, fields...) }
|
||||
func Warn(msg string, fields ...Field) { std.Warn(msg, fields...) }
|
||||
func Error(msg string, fields ...Field) { std.Error(msg, fields...) }
|
||||
func Panic(msg string, fields ...Field) { std.Panic(msg, fields...) }
|
||||
func Fatal(msg string, fields ...Field) { std.Fatal(msg, fields...) }
|
||||
|
||||
func Sync() error { return std.Sync() }
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package log
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLogger(t *testing.T) {
|
||||
defer Sync()
|
||||
Info("Testing")
|
||||
Debug("Testing")
|
||||
Warn("Testing")
|
||||
Error("Testing")
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
Debug("logPanic recover")
|
||||
}
|
||||
}()
|
||||
Panic("Testing")
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package log
|
||||
|
||||
import "go.uber.org/zap"
|
||||
|
||||
type Option = zap.Option
|
||||
|
||||
var (
|
||||
WrapCore = zap.WrapCore
|
||||
Hooks = zap.Hooks
|
||||
Fields = zap.Fields
|
||||
ErrorOutput = zap.ErrorOutput
|
||||
Development = zap.Development
|
||||
AddCaller = zap.AddCaller
|
||||
WithCaller = zap.WithCaller
|
||||
AddCallerSkip = zap.AddCallerSkip
|
||||
AddStacktrace = zap.AddStacktrace
|
||||
IncreaseLevel = zap.IncreaseLevel
|
||||
WithFatalHook = zap.WithFatalHook
|
||||
WithClock = zap.WithClock
|
||||
)
|
|
@ -0,0 +1,404 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/endian"
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
)
|
||||
|
||||
var ErrInvalidArgument = errors.New("invalid argument")
|
||||
|
||||
func ToProtobufType(dataType arrow.Type) (storagev2pb.LogicType, error) {
|
||||
typeId := int(dataType)
|
||||
if typeId < 0 || typeId >= int(storagev2pb.LogicType_MAX_ID) {
|
||||
return storagev2pb.LogicType_NA, fmt.Errorf("parse data type %v: %w", dataType, ErrInvalidArgument)
|
||||
}
|
||||
return storagev2pb.LogicType(typeId), nil
|
||||
}
|
||||
|
||||
func ToProtobufMetadata(metadata *arrow.Metadata) (*storagev2pb.KeyValueMetadata, error) {
|
||||
keys := metadata.Keys()
|
||||
values := metadata.Values()
|
||||
return &storagev2pb.KeyValueMetadata{Keys: keys, Values: values}, nil
|
||||
}
|
||||
|
||||
func ToProtobufDataType(dataType arrow.DataType) (*storagev2pb.DataType, error) {
|
||||
protoType := &storagev2pb.DataType{}
|
||||
err := SetTypeValues(protoType, dataType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
logicType, err := ToProtobufType(dataType.ID())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
protoType.LogicType = logicType
|
||||
|
||||
if len(GetFields(dataType)) > 0 {
|
||||
for _, field := range GetFields(dataType) {
|
||||
fieldCopy := field
|
||||
protoFieldType, err := ToProtobufField(&fieldCopy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
protoType.Children = append(protoType.Children, protoFieldType)
|
||||
}
|
||||
}
|
||||
|
||||
return protoType, nil
|
||||
}
|
||||
|
||||
// GetFields TODO CHECK MORE TYPES
|
||||
func GetFields(dataType arrow.DataType) []arrow.Field {
|
||||
switch dataType.ID() {
|
||||
case arrow.LIST:
|
||||
listType, _ := dataType.(*arrow.ListType)
|
||||
return listType.Fields()
|
||||
case arrow.STRUCT:
|
||||
structType, _ := dataType.(*arrow.StructType)
|
||||
return structType.Fields()
|
||||
case arrow.MAP:
|
||||
mapType, _ := dataType.(*arrow.MapType)
|
||||
return mapType.Fields()
|
||||
case arrow.FIXED_SIZE_LIST:
|
||||
listType, _ := dataType.(*arrow.FixedSizeListType)
|
||||
return listType.Fields()
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func ToProtobufField(field *arrow.Field) (*storagev2pb.Field, error) {
|
||||
protoField := &storagev2pb.Field{}
|
||||
protoField.Name = field.Name
|
||||
protoField.Nullable = field.Nullable
|
||||
|
||||
if field.Metadata.Len() != 0 {
|
||||
fieldMetadata, err := ToProtobufMetadata(&field.Metadata)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("convert to protobuf field: %w", err)
|
||||
}
|
||||
protoField.Metadata = fieldMetadata
|
||||
}
|
||||
|
||||
dataType, err := ToProtobufDataType(field.Type)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("convert to protobuf field: %w", err)
|
||||
}
|
||||
protoField.DataType = dataType
|
||||
return protoField, nil
|
||||
}
|
||||
|
||||
func SetTypeValues(protoType *storagev2pb.DataType, dataType arrow.DataType) error {
|
||||
switch dataType.ID() {
|
||||
case arrow.FIXED_SIZE_BINARY:
|
||||
realType, ok := dataType.(*arrow.FixedSizeBinaryType)
|
||||
if !ok {
|
||||
return fmt.Errorf("convert to fixed size binary type: %w", ErrInvalidArgument)
|
||||
}
|
||||
fixedSizeBinaryType := &storagev2pb.FixedSizeBinaryType{}
|
||||
fixedSizeBinaryType.ByteWidth = int32(realType.ByteWidth)
|
||||
protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeBinaryType{FixedSizeBinaryType: fixedSizeBinaryType}
|
||||
case arrow.FIXED_SIZE_LIST:
|
||||
realType, ok := dataType.(*arrow.FixedSizeListType)
|
||||
if !ok {
|
||||
return fmt.Errorf("convert to fixed size list type: %w", ErrInvalidArgument)
|
||||
}
|
||||
fixedSizeListType := &storagev2pb.FixedSizeListType{}
|
||||
fixedSizeListType.ListSize = realType.Len()
|
||||
protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeListType{FixedSizeListType: fixedSizeListType}
|
||||
case arrow.DICTIONARY:
|
||||
realType, ok := dataType.(*arrow.DictionaryType)
|
||||
if !ok {
|
||||
return fmt.Errorf("convert to dictionary type: %w", ErrInvalidArgument)
|
||||
}
|
||||
dictionaryType := &storagev2pb.DictionaryType{}
|
||||
indexType, err := ToProtobufDataType(realType.IndexType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dictionaryType.IndexType = indexType
|
||||
valueType, err := ToProtobufDataType(realType.ValueType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dictionaryType.ValueType = valueType
|
||||
dictionaryType.Ordered = realType.Ordered
|
||||
protoType.TypeRelatedValues = &storagev2pb.DataType_DictionaryType{DictionaryType: dictionaryType}
|
||||
|
||||
case arrow.MAP:
|
||||
realType, ok := dataType.(*arrow.MapType)
|
||||
if !ok {
|
||||
return fmt.Errorf("convert to map type: %w", ErrInvalidArgument)
|
||||
}
|
||||
mapType := &storagev2pb.MapType{}
|
||||
mapType.KeysSorted = realType.KeysSorted
|
||||
protoType.TypeRelatedValues = &storagev2pb.DataType_MapType{MapType: mapType}
|
||||
|
||||
default:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func ToProtobufSchema(schema *arrow.Schema) (*storagev2pb.ArrowSchema, error) {
|
||||
protoSchema := &storagev2pb.ArrowSchema{}
|
||||
for _, field := range schema.Fields() {
|
||||
fieldCopy := field
|
||||
protoField, err := ToProtobufField(&fieldCopy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
protoSchema.Fields = append(protoSchema.Fields, protoField)
|
||||
}
|
||||
if schema.Endianness() == endian.LittleEndian {
|
||||
protoSchema.Endianness = storagev2pb.Endianness_Little
|
||||
} else if schema.Endianness() == endian.BigEndian {
|
||||
protoSchema.Endianness = storagev2pb.Endianness_Big
|
||||
}
|
||||
|
||||
// TODO FIX ME: golang proto not support proto_schema->mutable_metadata()->add_keys(key);
|
||||
if schema.HasMetadata() && !schema.HasMetadata() {
|
||||
for _, key := range schema.Metadata().Keys() {
|
||||
protoKeyValue := protoSchema.GetMetadata()
|
||||
protoKeyValue.Keys = append(protoKeyValue.Keys, key)
|
||||
}
|
||||
for _, value := range schema.Metadata().Values() {
|
||||
protoKeyValue := protoSchema.GetMetadata()
|
||||
protoKeyValue.Values = append(protoKeyValue.Values, value)
|
||||
}
|
||||
}
|
||||
|
||||
return protoSchema, nil
|
||||
}
|
||||
|
||||
func FromProtobufSchema(schema *storagev2pb.ArrowSchema) (*arrow.Schema, error) {
|
||||
fields := make([]arrow.Field, 0, len(schema.Fields))
|
||||
for _, field := range schema.Fields {
|
||||
tmp, err := FromProtobufField(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fields = append(fields, *tmp)
|
||||
}
|
||||
tmp, err := FromProtobufKeyValueMetadata(schema.Metadata)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newSchema := arrow.NewSchema(fields, tmp)
|
||||
return newSchema, nil
|
||||
}
|
||||
|
||||
func FromProtobufField(field *storagev2pb.Field) (*arrow.Field, error) {
|
||||
datatype, err := FromProtobufDataType(field.DataType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metadata, err := FromProtobufKeyValueMetadata(field.GetMetadata())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &arrow.Field{Name: field.Name, Type: datatype, Nullable: field.Nullable, Metadata: *metadata}, nil
|
||||
}
|
||||
|
||||
func FromProtobufKeyValueMetadata(metadata *storagev2pb.KeyValueMetadata) (*arrow.Metadata, error) {
|
||||
keys := make([]string, 0)
|
||||
values := make([]string, 0)
|
||||
if metadata != nil {
|
||||
keys = metadata.Keys
|
||||
values = metadata.Values
|
||||
}
|
||||
newMetadata := arrow.NewMetadata(keys, values)
|
||||
return &newMetadata, nil
|
||||
}
|
||||
|
||||
func FromProtobufDataType(dataType *storagev2pb.DataType) (arrow.DataType, error) {
|
||||
switch dataType.LogicType {
|
||||
case storagev2pb.LogicType_NA:
|
||||
return &arrow.NullType{}, nil
|
||||
case storagev2pb.LogicType_BOOL:
|
||||
return &arrow.BooleanType{}, nil
|
||||
case storagev2pb.LogicType_UINT8:
|
||||
return &arrow.Uint8Type{}, nil
|
||||
case storagev2pb.LogicType_INT8:
|
||||
return &arrow.Int8Type{}, nil
|
||||
case storagev2pb.LogicType_UINT16:
|
||||
return &arrow.Uint16Type{}, nil
|
||||
case storagev2pb.LogicType_INT16:
|
||||
return &arrow.Int16Type{}, nil
|
||||
case storagev2pb.LogicType_UINT32:
|
||||
return &arrow.Uint32Type{}, nil
|
||||
case storagev2pb.LogicType_INT32:
|
||||
return &arrow.Int32Type{}, nil
|
||||
case storagev2pb.LogicType_UINT64:
|
||||
return &arrow.Uint64Type{}, nil
|
||||
case storagev2pb.LogicType_INT64:
|
||||
return &arrow.Int64Type{}, nil
|
||||
case storagev2pb.LogicType_HALF_FLOAT:
|
||||
return &arrow.Float16Type{}, nil
|
||||
case storagev2pb.LogicType_FLOAT:
|
||||
return &arrow.Float32Type{}, nil
|
||||
case storagev2pb.LogicType_DOUBLE:
|
||||
return &arrow.Float64Type{}, nil
|
||||
case storagev2pb.LogicType_STRING:
|
||||
return &arrow.StringType{}, nil
|
||||
case storagev2pb.LogicType_BINARY:
|
||||
return &arrow.BinaryType{}, nil
|
||||
|
||||
case storagev2pb.LogicType_LIST:
|
||||
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
listType := arrow.ListOf(fieldType.Type)
|
||||
return listType, nil
|
||||
|
||||
case storagev2pb.LogicType_STRUCT:
|
||||
fields := make([]arrow.Field, 0, len(dataType.Children))
|
||||
for _, child := range dataType.Children {
|
||||
field, err := FromProtobufField(child)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fields = append(fields, *field)
|
||||
}
|
||||
structType := arrow.StructOf(fields...)
|
||||
return structType, nil
|
||||
|
||||
case storagev2pb.LogicType_DICTIONARY:
|
||||
keyType, err := FromProtobufField(dataType.Children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
valueType, err := FromProtobufField(dataType.Children[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dictType := &arrow.DictionaryType{
|
||||
IndexType: keyType.Type,
|
||||
ValueType: valueType.Type,
|
||||
}
|
||||
return dictType, nil
|
||||
|
||||
case storagev2pb.LogicType_MAP:
|
||||
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// TODO FIX ME
|
||||
return arrow.MapOf(fieldType.Type, fieldType.Type), nil
|
||||
|
||||
case storagev2pb.LogicType_FIXED_SIZE_BINARY:
|
||||
|
||||
sizeBinaryType := arrow.FixedSizeBinaryType{ByteWidth: int(dataType.GetFixedSizeBinaryType().ByteWidth)}
|
||||
return &sizeBinaryType, nil
|
||||
|
||||
case storagev2pb.LogicType_FIXED_SIZE_LIST:
|
||||
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fixedSizeListType := arrow.FixedSizeListOf(int32(int(dataType.GetFixedSizeListType().ListSize)), fieldType.Type)
|
||||
return fixedSizeListType, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("parse protobuf datatype: %w", ErrInvalidArgument)
|
||||
}
|
||||
}
|
||||
|
||||
func GetNewParquetFilePath(path string) string {
|
||||
scalarFileId := uuid.New()
|
||||
path = filepath.Join(path, scalarFileId.String()+constant.ParquetDataFileSuffix)
|
||||
return path
|
||||
}
|
||||
|
||||
func GetManifestFilePath(path string, version int64) string {
|
||||
path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestFileSuffix)
|
||||
return path
|
||||
}
|
||||
|
||||
func GetManifestTmpFilePath(path string, version int64) string {
|
||||
path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestTempFileSuffix)
|
||||
return path
|
||||
}
|
||||
|
||||
func GetBlobFilePath(path string) string {
|
||||
blobId := uuid.New()
|
||||
return filepath.Join(GetBlobDir(path), blobId.String())
|
||||
}
|
||||
|
||||
func GetManifestDir(path string) string {
|
||||
path = filepath.Join(path, constant.ManifestDir)
|
||||
return path
|
||||
}
|
||||
|
||||
func GetVectorDataDir(path string) string {
|
||||
return filepath.Join(path, constant.VectorDataDir)
|
||||
}
|
||||
|
||||
func GetScalarDataDir(path string) string {
|
||||
return filepath.Join(path, constant.ScalarDataDir)
|
||||
}
|
||||
|
||||
func GetBlobDir(path string) string {
|
||||
return filepath.Join(path, constant.BlobDir)
|
||||
}
|
||||
|
||||
func GetDeleteDataDir(path string) string {
|
||||
return filepath.Join(path, constant.DeleteDataDir)
|
||||
}
|
||||
|
||||
func ParseVersionFromFileName(path string) int64 {
|
||||
pos := strings.Index(path, constant.ManifestFileSuffix)
|
||||
if pos == -1 || !strings.HasSuffix(path, constant.ManifestFileSuffix) {
|
||||
log.Warn("manifest file suffix not match", log.String("path", path))
|
||||
return -1
|
||||
}
|
||||
version := path[0:pos]
|
||||
versionInt, err := strconv.ParseInt(version, 10, 64)
|
||||
if err != nil {
|
||||
log.Error("parse version from file name error", log.String("path", path), log.String("version", version))
|
||||
return -1
|
||||
}
|
||||
return versionInt
|
||||
}
|
||||
|
||||
func ProjectSchema(sc *arrow.Schema, columns []string) *arrow.Schema {
|
||||
var fields []arrow.Field
|
||||
for _, field := range sc.Fields() {
|
||||
for _, column := range columns {
|
||||
if field.Name == column {
|
||||
fields = append(fields, field)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return arrow.NewSchema(fields, nil)
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
|
||||
**storage layer interface**: supply reader/writer of storage which contains read options. Maintain meta of storage and handle atomic read/write with multiple files (maybe have different format) on disks.
|
||||
|
||||
---
|
||||
|
||||
**File Reader/Writer interface**: receive data and read options from upper layer and turn the raw data to our defined data.
|
||||
|
||||
---
|
||||
|
||||
**File Format Reader/Writer**: file format reader/writer (eg. parquet/raw/others like orc).
|
||||
|
||||
---
|
||||
|
||||
**File system interface**: support different file system (eg. in-memory, aws, minio, posix, windows).
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package blob
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
)
|
||||
|
||||
type Blob struct {
|
||||
Name string
|
||||
Size int64
|
||||
File string
|
||||
}
|
||||
|
||||
func (b Blob) ToProtobuf() *storagev2pb.Blob {
|
||||
blob := &storagev2pb.Blob{}
|
||||
blob.Name = b.Name
|
||||
blob.Size = b.Size
|
||||
blob.File = b.File
|
||||
return blob
|
||||
}
|
||||
|
||||
func FromProtobuf(blob *storagev2pb.Blob) Blob {
|
||||
return Blob{
|
||||
Name: blob.Name,
|
||||
Size: blob.Size,
|
||||
File: blob.File,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fragment
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type (
|
||||
pkType any
|
||||
DeleteFragmentVector []DeleteFragment
|
||||
DeleteFragment struct {
|
||||
id int64
|
||||
schema *schema.Schema
|
||||
fs fs.Fs
|
||||
data map[pkType][]int64
|
||||
}
|
||||
)
|
||||
|
||||
func NewDeleteFragment(id int64, schema *schema.Schema, fs fs.Fs) *DeleteFragment {
|
||||
return &DeleteFragment{
|
||||
id: id,
|
||||
schema: schema,
|
||||
fs: fs,
|
||||
data: make(map[pkType][]int64),
|
||||
}
|
||||
}
|
||||
|
||||
func Make(f fs.Fs, s *schema.Schema, frag Fragment) DeleteFragment {
|
||||
// TODO: implement
|
||||
panic("implement me")
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fragment
|
||||
|
||||
import "github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
|
||||
type FragmentType int32
|
||||
|
||||
const (
|
||||
kUnknown FragmentType = 0
|
||||
kData FragmentType = 1
|
||||
kDelete FragmentType = 2
|
||||
)
|
||||
|
||||
type Fragment struct {
|
||||
fragmentId int64
|
||||
files []string
|
||||
}
|
||||
|
||||
type FragmentVector []Fragment
|
||||
|
||||
func ToFilesVector(fragments []Fragment) []string {
|
||||
files := make([]string, 0)
|
||||
for _, fragment := range fragments {
|
||||
files = append(files, fragment.files...)
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
func NewFragment() Fragment {
|
||||
return Fragment{
|
||||
files: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Fragment) AddFile(file string) {
|
||||
f.files = append(f.files, file)
|
||||
}
|
||||
|
||||
func (f *Fragment) Files() []string {
|
||||
return f.files
|
||||
}
|
||||
|
||||
func (f *Fragment) FragmentId() int64 {
|
||||
return f.fragmentId
|
||||
}
|
||||
|
||||
func (f *Fragment) SetFragmentId(fragmentId int64) {
|
||||
f.fragmentId = fragmentId
|
||||
}
|
||||
|
||||
func (f *Fragment) ToProtobuf() *storagev2pb.Fragment {
|
||||
fragment := &storagev2pb.Fragment{}
|
||||
fragment.Id = f.fragmentId
|
||||
fragment.Files = append(fragment.Files, f.files...)
|
||||
return fragment
|
||||
}
|
||||
|
||||
func FromProtobuf(fragment *storagev2pb.Fragment) Fragment {
|
||||
newFragment := NewFragment()
|
||||
newFragment.SetFragmentId(fragment.GetId())
|
||||
newFragment.files = append(newFragment.files, fragment.Files...)
|
||||
return newFragment
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
type ConjunctionAndFilter struct {
|
||||
filters []Filter
|
||||
columnName string
|
||||
}
|
||||
|
||||
func (f *ConjunctionAndFilter) GetColumnName() string {
|
||||
return f.columnName
|
||||
}
|
||||
|
||||
// FIXME: should have 3 cases.
|
||||
// 1. all records satisfy the filter, this group dont need to check filter again.
|
||||
// 2. no record satisfies the filter.
|
||||
// 3. some records satisfy the filter, this group should check filter again.
|
||||
func (f *ConjunctionAndFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||
for _, filter := range f.filters {
|
||||
if filter.CheckStatistics(stats) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (f *ConjunctionAndFilter) Type() FilterType {
|
||||
return And
|
||||
}
|
||||
|
||||
func (f *ConjunctionAndFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||
for i := 0; i < len(f.filters); i++ {
|
||||
f.filters[i].Apply(colData, filterBitSet)
|
||||
}
|
||||
}
|
||||
|
||||
type ConjunctionOrFilter struct {
|
||||
filters []Filter
|
||||
}
|
||||
|
||||
func (f *ConjunctionOrFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||
for _, filter := range f.filters {
|
||||
if !filter.CheckStatistics(stats) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *ConjunctionOrFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||
orBitSet := bitset.New(filterBitSet.Len())
|
||||
for i := 1; i < len(f.filters); i++ {
|
||||
childBitSet := filterBitSet.Clone()
|
||||
f.filters[i].Apply(colData, childBitSet)
|
||||
orBitSet.Intersection(childBitSet)
|
||||
}
|
||||
filterBitSet.Union(orBitSet)
|
||||
}
|
||||
|
||||
func (f *ConjunctionOrFilter) Type() FilterType {
|
||||
return Or
|
||||
}
|
||||
|
||||
func NewConjunctionAndFilter(filters ...Filter) *ConjunctionAndFilter {
|
||||
return &ConjunctionAndFilter{filters: filters}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/parquet"
|
||||
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
type ConstantFilter struct {
|
||||
cmpType ComparisonType
|
||||
value interface{}
|
||||
columnName string
|
||||
}
|
||||
|
||||
func (f *ConstantFilter) GetColumnName() string {
|
||||
return f.columnName
|
||||
}
|
||||
|
||||
func (f *ConstantFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||
// FIXME: value may be int8/uint8/...., we should encapsulate the value type, now we just do type assertion for prototype
|
||||
switch stats.Type() {
|
||||
case parquet.Types.Int32:
|
||||
i32stats := stats.(*metadata.Int32Statistics)
|
||||
if i32stats.HasMinMax() {
|
||||
return checkStats(f.value.(int32), i32stats.Min(), i32stats.Max(), f.cmpType)
|
||||
}
|
||||
case parquet.Types.Int64:
|
||||
i64stats := stats.(*metadata.Int64Statistics)
|
||||
if i64stats.HasMinMax() {
|
||||
return checkStats(f.value.(int64), i64stats.Min(), i64stats.Max(), f.cmpType)
|
||||
}
|
||||
case parquet.Types.Float:
|
||||
floatstats := stats.(*metadata.Float32Statistics)
|
||||
if floatstats.HasMinMax() {
|
||||
return checkStats(f.value.(float32), floatstats.Min(), floatstats.Max(), f.cmpType)
|
||||
}
|
||||
case parquet.Types.Double:
|
||||
doublestats := stats.(*metadata.Float64Statistics)
|
||||
if doublestats.HasMinMax() {
|
||||
return checkStats(f.value.(float64), doublestats.Min(), doublestats.Max(), f.cmpType)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type comparableValue interface {
|
||||
int32 | int64 | float32 | float64
|
||||
}
|
||||
|
||||
func checkStats[T comparableValue](value, min, max T, cmpType ComparisonType) bool {
|
||||
switch cmpType {
|
||||
case Equal:
|
||||
return value < min || value > max
|
||||
case NotEqual:
|
||||
return value == min && value == max
|
||||
case LessThan:
|
||||
return value <= min
|
||||
case LessThanOrEqual:
|
||||
return value < min
|
||||
case GreaterThan:
|
||||
return value >= max
|
||||
case GreaterThanOrEqual:
|
||||
return value > max
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (f *ConstantFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||
switch data := colData.(type) {
|
||||
case *array.Int8:
|
||||
filterColumn(f.value.(int8), data.Int8Values(), f.cmpType, filterBitSet)
|
||||
case *array.Uint8:
|
||||
filterColumn(f.value.(uint8), data.Uint8Values(), f.cmpType, filterBitSet)
|
||||
case *array.Int16:
|
||||
filterColumn(f.value.(int16), data.Int16Values(), f.cmpType, filterBitSet)
|
||||
case *array.Uint16:
|
||||
filterColumn(f.value.(uint16), data.Uint16Values(), f.cmpType, filterBitSet)
|
||||
case *array.Int32:
|
||||
filterColumn(f.value.(int32), data.Int32Values(), f.cmpType, filterBitSet)
|
||||
case *array.Uint32:
|
||||
filterColumn(f.value.(uint32), data.Uint32Values(), f.cmpType, filterBitSet)
|
||||
case *array.Int64:
|
||||
filterColumn(f.value.(int64), data.Int64Values(), f.cmpType, filterBitSet)
|
||||
case *array.Uint64:
|
||||
filterColumn(f.value.(uint64), data.Uint64Values(), f.cmpType, filterBitSet)
|
||||
case *array.Float32:
|
||||
filterColumn(f.value.(float32), data.Float32Values(), f.cmpType, filterBitSet)
|
||||
case *array.Float64:
|
||||
filterColumn(f.value.(float64), data.Float64Values(), f.cmpType, filterBitSet)
|
||||
}
|
||||
}
|
||||
|
||||
type comparableColumnType interface {
|
||||
int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64
|
||||
}
|
||||
|
||||
func filterColumn[T comparableColumnType](value T, targets []T, cmpType ComparisonType, filterBitSet *bitset.BitSet) {
|
||||
for i, target := range targets {
|
||||
if checkColumn(value, target, cmpType) {
|
||||
filterBitSet.Set(uint(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func checkColumn[T comparableColumnType](value, target T, cmpType ComparisonType) bool {
|
||||
switch cmpType {
|
||||
case Equal:
|
||||
return value != target
|
||||
case NotEqual:
|
||||
return value == target
|
||||
case LessThan:
|
||||
return value <= target
|
||||
case LessThanOrEqual:
|
||||
return value < target
|
||||
case GreaterThan:
|
||||
return value >= target
|
||||
case GreaterThanOrEqual:
|
||||
return value > target
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (f *ConstantFilter) Type() FilterType {
|
||||
return Constant
|
||||
}
|
||||
|
||||
func NewConstantFilter(cmpType ComparisonType, columnName string, value interface{}) *ConstantFilter {
|
||||
return &ConstantFilter{
|
||||
cmpType: cmpType,
|
||||
columnName: columnName,
|
||||
value: value,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package filter
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
type FilterType int8
|
||||
|
||||
const (
|
||||
And FilterType = iota
|
||||
Or
|
||||
Constant
|
||||
Range
|
||||
)
|
||||
|
||||
type Filter interface {
|
||||
CheckStatistics(metadata.TypedStatistics) bool
|
||||
Type() FilterType
|
||||
Apply(colData arrow.Array, filterBitSet *bitset.BitSet)
|
||||
GetColumnName() string
|
||||
}
|
||||
|
||||
type ComparisonType int8
|
||||
|
||||
const (
|
||||
Equal ComparisonType = iota
|
||||
NotEqual
|
||||
LessThan
|
||||
LessThanOrEqual
|
||||
GreaterThan
|
||||
GreaterThanOrEqual
|
||||
)
|
|
@ -0,0 +1,220 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package parquet
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/apache/arrow/go/v12/parquet/file"
|
||||
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type FileReader struct {
|
||||
reader *pqarrow.FileReader
|
||||
options *options.ReadOptions
|
||||
recReader pqarrow.RecordReader
|
||||
}
|
||||
|
||||
// When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF)
|
||||
func (r *FileReader) Read() (arrow.Record, error) {
|
||||
if r.recReader == nil {
|
||||
// lazy init
|
||||
if err := r.initRecReader(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
rec, err := r.recReader.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return applyFilters(rec, r.options.Filters), nil
|
||||
}
|
||||
|
||||
func applyFilters(rec arrow.Record, filters map[string]filter.Filter) arrow.Record {
|
||||
filterBitSet := bitset.New(uint(rec.NumRows()))
|
||||
for col, f := range filters {
|
||||
colIndices := rec.Schema().FieldIndices(col)
|
||||
if len(colIndices) == 0 {
|
||||
panic("column not found")
|
||||
}
|
||||
colIndex := colIndices[0]
|
||||
arr := rec.Column(colIndex)
|
||||
f.Apply(arr, filterBitSet)
|
||||
}
|
||||
|
||||
if filterBitSet.None() {
|
||||
return rec
|
||||
}
|
||||
|
||||
var cols []arrow.Array
|
||||
for i := 0; i < int(rec.NumCols()); i++ {
|
||||
col := rec.Column(i)
|
||||
switch t := col.(type) {
|
||||
case *array.Int8:
|
||||
builder := array.NewInt8Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Int8Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Uint8:
|
||||
builder := array.NewUint8Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Uint8Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Int16:
|
||||
builder := array.NewInt16Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Int16Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Uint16:
|
||||
builder := array.NewUint16Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Uint16Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Int32:
|
||||
builder := array.NewInt32Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Int32Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Uint32:
|
||||
builder := array.NewUint32Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Uint32Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Int64:
|
||||
builder := array.NewInt64Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Int64Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
case *array.Uint64:
|
||||
builder := array.NewUint64Builder(memory.DefaultAllocator)
|
||||
filtered := filterRecord(t.Uint64Values(), filterBitSet)
|
||||
builder.AppendValues(filtered, nil)
|
||||
cols = append(cols, builder.NewArray())
|
||||
default:
|
||||
panic("unsupported type")
|
||||
}
|
||||
}
|
||||
|
||||
return array.NewRecord(rec.Schema(), cols, int64(cols[0].Len()))
|
||||
}
|
||||
|
||||
type comparableColumnType interface {
|
||||
int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64
|
||||
}
|
||||
|
||||
func filterRecord[T comparableColumnType](targets []T, filterBitSet *bitset.BitSet) []T {
|
||||
var res []T
|
||||
for i := 0; i < int(filterBitSet.Len()); i++ {
|
||||
if !filterBitSet.Test(uint(i)) {
|
||||
res = append(res, targets[i])
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (r *FileReader) initRecReader() error {
|
||||
var (
|
||||
filters map[string]filter.Filter = r.options.Filters
|
||||
columns []string = r.options.Columns
|
||||
)
|
||||
|
||||
var (
|
||||
rowGroupNum int = r.reader.ParquetReader().NumRowGroups()
|
||||
fileMetaData *metadata.FileMetaData = r.reader.ParquetReader().MetaData()
|
||||
)
|
||||
|
||||
var rowGroups []int
|
||||
var colIndices []int
|
||||
// filters check column statistics
|
||||
x1:
|
||||
for i := 0; i < rowGroupNum; i++ {
|
||||
rowGroupMetaData := fileMetaData.RowGroup(i)
|
||||
for col, filter := range filters {
|
||||
if checkColumnStats(rowGroupMetaData, col, filter) {
|
||||
// ignore the row group
|
||||
break x1
|
||||
}
|
||||
}
|
||||
rowGroups = append(rowGroups, i)
|
||||
}
|
||||
|
||||
for _, col := range columns {
|
||||
colIndex := fileMetaData.Schema.Root().FieldIndexByName(col)
|
||||
if colIndex == -1 {
|
||||
panic("column not found")
|
||||
}
|
||||
colIndices = append(colIndices, colIndex)
|
||||
}
|
||||
|
||||
recReader, err := r.reader.GetRecordReader(context.TODO(), colIndices, rowGroups)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.recReader = recReader
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkColumnStats(rowGroupMetaData *metadata.RowGroupMetaData, col string, f filter.Filter) bool {
|
||||
colIndex := rowGroupMetaData.Schema.Root().FieldIndexByName(col)
|
||||
if colIndex == -1 {
|
||||
panic("column not found")
|
||||
}
|
||||
colMetaData, err := rowGroupMetaData.ColumnChunk(colIndex)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
stats, err := colMetaData.Statistics()
|
||||
if err != nil || stats == nil {
|
||||
return false
|
||||
}
|
||||
return f.CheckStatistics(stats)
|
||||
}
|
||||
|
||||
func (r *FileReader) Close() error {
|
||||
if r.recReader != nil {
|
||||
r.recReader.Release()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewFileReader(fs fs.Fs, filePath string, options *options.ReadOptions) (*FileReader, error) {
|
||||
f, err := fs.OpenFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
parquetReader, err := file.NewParquetReader(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reader, err := pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &FileReader{reader: reader, options: options}, nil
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package parquet
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/parquet"
|
||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
)
|
||||
|
||||
var _ format.Writer = (*FileWriter)(nil)
|
||||
|
||||
type FileWriter struct {
|
||||
writer *pqarrow.FileWriter
|
||||
count int64
|
||||
}
|
||||
|
||||
func (f *FileWriter) Write(record arrow.Record) error {
|
||||
if err := f.writer.Write(record); err != nil {
|
||||
return err
|
||||
}
|
||||
f.count += record.NumRows()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FileWriter) Count() int64 {
|
||||
return f.count
|
||||
}
|
||||
|
||||
func (f *FileWriter) Close() error {
|
||||
return f.writer.Close()
|
||||
}
|
||||
|
||||
func NewFileWriter(schema *arrow.Schema, fs fs.Fs, filePath string) (*FileWriter, error) {
|
||||
file, err := fs.OpenFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
w, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &FileWriter{writer: w}, nil
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package format
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
)
|
||||
|
||||
type Reader interface {
|
||||
Read() (arrow.Record, error)
|
||||
Close() error
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package format
|
||||
|
||||
import "github.com/apache/arrow/go/v12/arrow"
|
||||
|
||||
type Writer interface {
|
||||
Write(record arrow.Record) error
|
||||
Count() int64
|
||||
Close() error
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type Factory struct{}
|
||||
|
||||
func (f *Factory) Create(fsType options.FsType, uri *url.URL) (Fs, error) {
|
||||
switch fsType {
|
||||
case options.InMemory:
|
||||
return NewMemoryFs(), nil
|
||||
case options.LocalFS:
|
||||
return NewLocalFs(uri), nil
|
||||
case options.S3:
|
||||
return NewMinioFs(uri)
|
||||
default:
|
||||
panic("unknown fs type")
|
||||
}
|
||||
}
|
||||
|
||||
func NewFsFactory() *Factory {
|
||||
return &Factory{}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package file
|
||||
|
||||
import "io"
|
||||
|
||||
type File interface {
|
||||
io.Writer
|
||||
io.ReaderAt
|
||||
io.Seeker
|
||||
io.Reader
|
||||
io.Closer
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package file
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
var EOF = io.EOF
|
||||
|
||||
type LocalFile struct {
|
||||
file os.File
|
||||
}
|
||||
|
||||
func (l *LocalFile) Read(p []byte) (n int, err error) {
|
||||
return l.file.Read(p)
|
||||
}
|
||||
|
||||
func (l *LocalFile) Write(p []byte) (n int, err error) {
|
||||
return l.file.Write(p)
|
||||
}
|
||||
|
||||
func (l *LocalFile) ReadAt(p []byte, off int64) (n int, err error) {
|
||||
return l.file.ReadAt(p, off)
|
||||
}
|
||||
|
||||
func (l *LocalFile) Seek(offset int64, whence int) (int64, error) {
|
||||
return l.file.Seek(offset, whence)
|
||||
}
|
||||
|
||||
func (l *LocalFile) Close() error {
|
||||
return l.file.Close()
|
||||
}
|
||||
|
||||
func NewLocalFile(f *os.File) *LocalFile {
|
||||
return &LocalFile{
|
||||
file: *f,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package file
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
)
|
||||
|
||||
var errInvalid = errors.New("invalid argument")
|
||||
|
||||
type MemoryFile struct {
|
||||
b []byte
|
||||
i int
|
||||
}
|
||||
|
||||
func (f *MemoryFile) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *MemoryFile) Read(p []byte) (n int, err error) {
|
||||
if f.i >= len(f.b) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
n = copy(p, f.b[f.i:])
|
||||
f.i += n
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (f *MemoryFile) Write(b []byte) (int, error) {
|
||||
n, err := f.writeAt(b, int64(f.i))
|
||||
f.i += n
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (f *MemoryFile) writeAt(b []byte, off int64) (int, error) {
|
||||
if off < 0 || int64(int(off)) < off {
|
||||
return 0, errInvalid
|
||||
}
|
||||
if off > int64(len(f.b)) {
|
||||
f.truncate(off)
|
||||
}
|
||||
n := copy(f.b[off:], b)
|
||||
f.b = append(f.b, b[n:]...)
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (f *MemoryFile) truncate(n int64) error {
|
||||
switch {
|
||||
case n < 0 || int64(int(n)) < n:
|
||||
return errInvalid
|
||||
case n <= int64(len(f.b)):
|
||||
f.b = f.b[:n]
|
||||
return nil
|
||||
default:
|
||||
f.b = append(f.b, make([]byte, int(n)-len(f.b))...)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f *MemoryFile) ReadAt(b []byte, off int64) (n int, err error) {
|
||||
if off < 0 || int64(int(off)) < off {
|
||||
return 0, errInvalid
|
||||
}
|
||||
if off > int64(len(f.b)) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
n = copy(b, f.b[off:])
|
||||
f.i += n
|
||||
if n < len(b) {
|
||||
return n, io.EOF
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (f *MemoryFile) Seek(offset int64, whence int) (int64, error) {
|
||||
var abs int64
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
abs = offset
|
||||
case io.SeekCurrent:
|
||||
abs = int64(f.i) + offset
|
||||
case io.SeekEnd:
|
||||
abs = int64(len(f.b)) + offset
|
||||
default:
|
||||
return 0, errInvalid
|
||||
}
|
||||
if abs < 0 {
|
||||
return 0, errInvalid
|
||||
}
|
||||
f.i = int(abs)
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
func (f *MemoryFile) Bytes() []byte {
|
||||
return f.b
|
||||
}
|
||||
|
||||
func NewMemoryFile(b []byte) *MemoryFile {
|
||||
return &MemoryFile{
|
||||
b: b,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package file
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
)
|
||||
|
||||
var _ File = (*MinioFile)(nil)
|
||||
|
||||
type MinioFile struct {
|
||||
*minio.Object
|
||||
writer *MemoryFile
|
||||
client *minio.Client
|
||||
fileName string
|
||||
bucketName string
|
||||
}
|
||||
|
||||
func (f *MinioFile) Write(b []byte) (int, error) {
|
||||
return f.writer.Write(b)
|
||||
}
|
||||
|
||||
func (f *MinioFile) Close() error {
|
||||
if len(f.writer.b) == 0 {
|
||||
return nil
|
||||
}
|
||||
_, err := f.client.PutObject(context.TODO(), f.bucketName, f.fileName, bytes.NewReader(f.writer.b), int64(len(f.writer.b)), minio.PutObjectOptions{})
|
||||
return err
|
||||
}
|
||||
|
||||
func NewMinioFile(client *minio.Client, fileName string, bucketName string) (*MinioFile, error) {
|
||||
_, err := client.StatObject(context.TODO(), bucketName, fileName, minio.StatObjectOptions{})
|
||||
if err != nil {
|
||||
eresp := minio.ToErrorResponse(err)
|
||||
if eresp.Code != "NoSuchKey" {
|
||||
return nil, err
|
||||
}
|
||||
return &MinioFile{
|
||||
writer: NewMemoryFile(nil),
|
||||
client: client,
|
||||
fileName: fileName,
|
||||
bucketName: bucketName,
|
||||
}, nil
|
||||
}
|
||||
|
||||
object, err := client.GetObject(context.TODO(), bucketName, fileName, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &MinioFile{
|
||||
Object: object,
|
||||
writer: NewMemoryFile(nil),
|
||||
client: client,
|
||||
fileName: fileName,
|
||||
bucketName: bucketName,
|
||||
}, nil
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||
)
|
||||
|
||||
type Fs interface {
|
||||
OpenFile(path string) (file.File, error)
|
||||
Rename(src string, dst string) error
|
||||
DeleteFile(path string) error
|
||||
CreateDir(path string) error
|
||||
List(path string) ([]FileEntry, error)
|
||||
ReadFile(path string) ([]byte, error)
|
||||
Exist(path string) (bool, error)
|
||||
Path() string
|
||||
MkdirAll(dir string, i int) error
|
||||
}
|
||||
type FileEntry struct {
|
||||
Path string
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
var ErrInvalidFsType = errors.New("invalid fs type")
|
||||
|
||||
func BuildFileSystem(uri string) (Fs, error) {
|
||||
parsedURI, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build file system with uri %s: %w", uri, err)
|
||||
}
|
||||
switch parsedURI.Scheme {
|
||||
case "file":
|
||||
return NewFsFactory().Create(options.LocalFS, parsedURI)
|
||||
case "s3":
|
||||
return NewFsFactory().Create(options.S3, parsedURI)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("build file system with uri %s: %w", uri, ErrInvalidFsType)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||
)
|
||||
|
||||
type LocalFS struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (l *LocalFS) MkdirAll(dir string, i int) error {
|
||||
return os.MkdirAll(dir, os.FileMode(i))
|
||||
}
|
||||
|
||||
func (l *LocalFS) OpenFile(path string) (file.File, error) {
|
||||
// Extract the directory from the path
|
||||
dir := filepath.Dir(path)
|
||||
// Create the directory (including all necessary parent directories)
|
||||
err := os.MkdirAll(dir, os.ModePerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
open, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return file.NewLocalFile(open), nil
|
||||
}
|
||||
|
||||
// Rename renames (moves) a file. If newpath already exists and is not a directory, Rename replaces it.
|
||||
func (l *LocalFS) Rename(src string, dst string) error {
|
||||
return os.Rename(src, dst)
|
||||
}
|
||||
|
||||
func (l *LocalFS) DeleteFile(path string) error {
|
||||
return os.Remove(path)
|
||||
}
|
||||
|
||||
func (l *LocalFS) CreateDir(path string) error {
|
||||
err := os.MkdirAll(path, os.ModePerm)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *LocalFS) List(path string) ([]FileEntry, error) {
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret := make([]FileEntry, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
ret = append(ret, FileEntry{Path: filepath.Join(path, entry.Name())})
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (l *LocalFS) ReadFile(path string) ([]byte, error) {
|
||||
return os.ReadFile(path)
|
||||
}
|
||||
|
||||
func (l *LocalFS) Exist(path string) (bool, error) {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (l *LocalFS) Path() string {
|
||||
return l.path
|
||||
}
|
||||
|
||||
func NewLocalFs(uri *url.URL) *LocalFS {
|
||||
return &LocalFS{uri.Path}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||
)
|
||||
|
||||
type MemoryFs struct {
|
||||
files map[string]*file.MemoryFile
|
||||
}
|
||||
|
||||
func (m *MemoryFs) MkdirAll(dir string, i int) error {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m *MemoryFs) List(path string) ([]FileEntry, error) {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m *MemoryFs) OpenFile(path string) (file.File, error) {
|
||||
if f, ok := m.files[path]; ok {
|
||||
return file.NewMemoryFile(f.Bytes()), nil
|
||||
}
|
||||
f := file.NewMemoryFile(nil)
|
||||
m.files[path] = f
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func (m *MemoryFs) Rename(path string, path2 string) error {
|
||||
if _, ok := m.files[path]; !ok {
|
||||
return nil
|
||||
}
|
||||
m.files[path2] = m.files[path]
|
||||
delete(m.files, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MemoryFs) DeleteFile(path string) error {
|
||||
delete(m.files, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MemoryFs) CreateDir(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MemoryFs) ReadFile(path string) ([]byte, error) {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m *MemoryFs) Exist(path string) (bool, error) {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (m *MemoryFs) Path() string {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func NewMemoryFs() *MemoryFs {
|
||||
return &MemoryFs{
|
||||
files: make(map[string]*file.MemoryFile),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||
)
|
||||
|
||||
type MinioFs struct {
|
||||
client *minio.Client
|
||||
bucketName string
|
||||
path string
|
||||
}
|
||||
|
||||
func (fs *MinioFs) MkdirAll(dir string, i int) error {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (fs *MinioFs) OpenFile(path string) (file.File, error) {
|
||||
err, bucket, path := getRealPath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return file.NewMinioFile(fs.client, path, bucket)
|
||||
}
|
||||
|
||||
func (fs *MinioFs) Rename(src string, dst string) error {
|
||||
err, dstBucket, dst := getRealPath(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err, srcBucket, src := getRealPath(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = fs.client.CopyObject(context.TODO(), minio.CopyDestOptions{Bucket: dstBucket, Object: dst}, minio.CopySrcOptions{Bucket: srcBucket, Object: src})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = fs.client.RemoveObject(context.TODO(), srcBucket, src, minio.RemoveObjectOptions{})
|
||||
if err != nil {
|
||||
log.Warn("failed to remove source object", log.String("source", src))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fs *MinioFs) DeleteFile(path string) error {
|
||||
err, bucket, path := getRealPath(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fs.client.RemoveObject(context.TODO(), bucket, path, minio.RemoveObjectOptions{})
|
||||
}
|
||||
|
||||
func (fs *MinioFs) CreateDir(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fs *MinioFs) List(prefix string) ([]FileEntry, error) {
|
||||
err, bucket, prefix := getRealPath(prefix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ret := make([]FileEntry, 0)
|
||||
for objInfo := range fs.client.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
|
||||
if objInfo.Err != nil {
|
||||
log.Warn("list object error", zap.Error(objInfo.Err))
|
||||
return nil, objInfo.Err
|
||||
}
|
||||
ret = append(ret, FileEntry{Path: path.Join(bucket, objInfo.Key)})
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (fs *MinioFs) ReadFile(path string) ([]byte, error) {
|
||||
err, bucket, path := getRealPath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
obj, err := fs.client.GetObject(context.TODO(), bucket, path, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stat, err := obj.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := make([]byte, stat.Size)
|
||||
n, err := obj.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
return nil, err
|
||||
}
|
||||
if n != int(stat.Size) {
|
||||
return nil, fmt.Errorf("failed to read full file, expect: %d, actual: %d", stat.Size, n)
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (fs *MinioFs) Exist(path string) (bool, error) {
|
||||
err, bucket, path := getRealPath(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
_, err = fs.client.StatObject(context.TODO(), bucket, path, minio.StatObjectOptions{})
|
||||
if err != nil {
|
||||
resp := minio.ToErrorResponse(err)
|
||||
if resp.Code == "NoSuchKey" {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (fs *MinioFs) Path() string {
|
||||
return path.Join(fs.bucketName, strings.TrimPrefix(fs.path, "/"))
|
||||
}
|
||||
|
||||
// uri should be s3://username:password@bucket/path?endpoint_override=localhost%3A9000
|
||||
func NewMinioFs(uri *url.URL) (*MinioFs, error) {
|
||||
accessKey := uri.User.Username()
|
||||
secretAccessKey, set := uri.User.Password()
|
||||
if !set {
|
||||
log.Warn("secret access key not set")
|
||||
}
|
||||
|
||||
endpoints, ok := uri.Query()[constant.EndpointOverride]
|
||||
if !ok || len(endpoints) == 0 {
|
||||
return nil, errors.ErrNoEndpoint
|
||||
}
|
||||
|
||||
cli, err := minio.New(endpoints[0], &minio.Options{
|
||||
BucketLookup: minio.BucketLookupAuto,
|
||||
Creds: credentials.NewStaticV4(accessKey, secretAccessKey, ""),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bucket := uri.Host
|
||||
path := uri.Path
|
||||
|
||||
log.Info("minio fs infos", zap.String("endpoint", endpoints[0]), zap.String("bucket", bucket), zap.String("path", path))
|
||||
|
||||
exist, err := cli.BucketExists(context.TODO(), bucket)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !exist {
|
||||
if err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &MinioFs{
|
||||
client: cli,
|
||||
bucketName: bucket,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getRealPath(path string) (error, string, string) {
|
||||
if strings.HasPrefix(path, "/") {
|
||||
return fmt.Errorf("Invalid path, %s should not start with '/'", path), "", ""
|
||||
}
|
||||
words := strings.SplitN(path, "/", 2)
|
||||
if (len(words)) != 2 {
|
||||
return fmt.Errorf("Invalid path, %s should contains at least one '/'", path), "", ""
|
||||
}
|
||||
return nil, words[0], words[1]
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef ARROW_C_DATA_INTERFACE
|
||||
#define ARROW_C_DATA_INTERFACE
|
||||
|
||||
#define ARROW_FLAG_DICTIONARY_ORDERED 1
|
||||
#define ARROW_FLAG_NULLABLE 2
|
||||
#define ARROW_FLAG_MAP_KEYS_SORTED 4
|
||||
|
||||
struct ArrowSchema {
|
||||
// Array type description
|
||||
const char* format;
|
||||
const char* name;
|
||||
const char* metadata;
|
||||
int64_t flags;
|
||||
int64_t n_children;
|
||||
struct ArrowSchema** children;
|
||||
struct ArrowSchema* dictionary;
|
||||
|
||||
// Release callback
|
||||
void (*release)(struct ArrowSchema*);
|
||||
// Opaque producer-specific data
|
||||
void* private_data;
|
||||
};
|
||||
|
||||
struct ArrowArray {
|
||||
// Array data description
|
||||
int64_t length;
|
||||
int64_t null_count;
|
||||
int64_t offset;
|
||||
int64_t n_buffers;
|
||||
int64_t n_children;
|
||||
const void** buffers;
|
||||
struct ArrowArray** children;
|
||||
struct ArrowArray* dictionary;
|
||||
|
||||
// Release callback
|
||||
void (*release)(struct ArrowArray*);
|
||||
// Opaque producer-specific data
|
||||
void* private_data;
|
||||
};
|
||||
|
||||
#endif // ARROW_C_DATA_INTERFACE
|
||||
|
||||
#ifndef ARROW_C_STREAM_INTERFACE
|
||||
#define ARROW_C_STREAM_INTERFACE
|
||||
|
||||
struct ArrowArrayStream {
|
||||
// Callback to get the stream type
|
||||
// (will be the same for all arrays in the stream).
|
||||
//
|
||||
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
|
||||
//
|
||||
// If successful, the ArrowSchema must be released independently from the stream.
|
||||
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
|
||||
|
||||
// Callback to get the next array
|
||||
// (if no error and the array is released, the stream has ended)
|
||||
//
|
||||
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
|
||||
//
|
||||
// If successful, the ArrowArray must be released independently from the stream.
|
||||
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
|
||||
|
||||
// Callback to get optional detailed error information.
|
||||
// This must only be called if the last stream operation failed
|
||||
// with a non-0 return code.
|
||||
//
|
||||
// Return value: pointer to a null-terminated character array describing
|
||||
// the last error, or NULL if no description is available.
|
||||
//
|
||||
// The returned pointer is only valid until the next operation on this stream
|
||||
// (including release).
|
||||
const char* (*get_last_error)(struct ArrowArrayStream*);
|
||||
|
||||
// Release callback: release the stream's own resources.
|
||||
// Note that arrays returned by `get_next` must be individually released.
|
||||
void (*release)(struct ArrowArrayStream*);
|
||||
|
||||
// Opaque producer-specific data
|
||||
void* private_data;
|
||||
};
|
||||
|
||||
#endif // ARROW_C_STREAM_INTERFACE
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,115 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "arrow/c/abi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/// Query whether the C schema is released
|
||||
static inline int
|
||||
ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
|
||||
return schema->release == NULL;
|
||||
}
|
||||
|
||||
/// Mark the C schema released (for use in release callbacks)
|
||||
static inline void
|
||||
ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
|
||||
schema->release = NULL;
|
||||
}
|
||||
|
||||
/// Move the C schema from `src` to `dest`
|
||||
///
|
||||
/// Note `dest` must *not* point to a valid schema already, otherwise there
|
||||
/// will be a memory leak.
|
||||
static inline void
|
||||
ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
|
||||
assert(dest != src);
|
||||
assert(!ArrowSchemaIsReleased(src));
|
||||
memcpy(dest, src, sizeof(struct ArrowSchema));
|
||||
ArrowSchemaMarkReleased(src);
|
||||
}
|
||||
|
||||
/// Release the C schema, if necessary, by calling its release callback
|
||||
static inline void
|
||||
ArrowSchemaRelease(struct ArrowSchema* schema) {
|
||||
if (!ArrowSchemaIsReleased(schema)) {
|
||||
schema->release(schema);
|
||||
assert(ArrowSchemaIsReleased(schema));
|
||||
}
|
||||
}
|
||||
|
||||
/// Query whether the C array is released
|
||||
static inline int
|
||||
ArrowArrayIsReleased(const struct ArrowArray* array) {
|
||||
return array->release == NULL;
|
||||
}
|
||||
|
||||
/// Mark the C array released (for use in release callbacks)
|
||||
static inline void
|
||||
ArrowArrayMarkReleased(struct ArrowArray* array) {
|
||||
array->release = NULL;
|
||||
}
|
||||
|
||||
/// Move the C array from `src` to `dest`
|
||||
///
|
||||
/// Note `dest` must *not* point to a valid array already, otherwise there
|
||||
/// will be a memory leak.
|
||||
static inline void
|
||||
ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
|
||||
assert(dest != src);
|
||||
assert(!ArrowArrayIsReleased(src));
|
||||
memcpy(dest, src, sizeof(struct ArrowArray));
|
||||
ArrowArrayMarkReleased(src);
|
||||
}
|
||||
|
||||
/// Release the C array, if necessary, by calling its release callback
|
||||
static inline void
|
||||
ArrowArrayRelease(struct ArrowArray* array) {
|
||||
if (!ArrowArrayIsReleased(array)) {
|
||||
array->release(array);
|
||||
assert(ArrowArrayIsReleased(array));
|
||||
}
|
||||
}
|
||||
|
||||
/// Query whether the C array stream is released
|
||||
static inline int
|
||||
ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
|
||||
return stream->release == NULL;
|
||||
}
|
||||
|
||||
/// Mark the C array stream released (for use in release callbacks)
|
||||
static inline void
|
||||
ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
|
||||
stream->release = NULL;
|
||||
}
|
||||
|
||||
/// Move the C array stream from `src` to `dest`
|
||||
///
|
||||
/// Note `dest` must *not* point to a valid stream already, otherwise there
|
||||
/// will be a memory leak.
|
||||
static inline void
|
||||
ArrowArrayStreamMove(struct ArrowArrayStream* src,
|
||||
struct ArrowArrayStream* dest) {
|
||||
assert(dest != src);
|
||||
assert(!ArrowArrayStreamIsReleased(src));
|
||||
memcpy(dest, src, sizeof(struct ArrowArrayStream));
|
||||
ArrowArrayStreamMarkReleased(src);
|
||||
}
|
||||
|
||||
/// Release the C array stream, if necessary, by calling its release callback
|
||||
static inline void
|
||||
ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
|
||||
if (!ArrowArrayStreamIsReleased(stream)) {
|
||||
stream->release(stream);
|
||||
assert(ArrowArrayStreamIsReleased(stream));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package packed
|
||||
|
||||
/*
|
||||
#cgo pkg-config: milvus_core
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "segcore/packed_reader_c.h"
|
||||
#include "arrow/c/abi.h"
|
||||
#include "arrow/c/helpers.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||
"github.com/cockroachdb/errors"
|
||||
)
|
||||
|
||||
func NewPackedReader(path string, schema *arrow.Schema, bufferSize int) (*PackedReader, error) {
|
||||
var cas cdata.CArrowSchema
|
||||
cdata.ExportArrowSchema(schema, &cas)
|
||||
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
|
||||
cBufferSize := C.int64_t(bufferSize)
|
||||
|
||||
var cPackedReader C.CPackedReader
|
||||
status := C.NewPackedReader(cPath, cSchema, cBufferSize, &cPackedReader)
|
||||
if status != 0 {
|
||||
return nil, fmt.Errorf("failed to new packed reader: %s, status: %d", path, status)
|
||||
}
|
||||
return &PackedReader{cPackedReader: cPackedReader, schema: schema}, nil
|
||||
}
|
||||
|
||||
func (pr *PackedReader) ReadNext() (arrow.Record, error) {
|
||||
var cArr C.CArrowArray
|
||||
var cSchema C.CArrowSchema
|
||||
status := C.ReadNext(pr.cPackedReader, &cArr, &cSchema)
|
||||
if status != 0 {
|
||||
return nil, fmt.Errorf("ReadNext failed with error code %d", status)
|
||||
}
|
||||
|
||||
if cArr == nil {
|
||||
return nil, nil // end of stream, no more records to read
|
||||
}
|
||||
|
||||
// Convert ArrowArray to Go RecordBatch using cdata
|
||||
goCArr := (*cdata.CArrowArray)(unsafe.Pointer(cArr))
|
||||
goCSchema := (*cdata.CArrowSchema)(unsafe.Pointer(cSchema))
|
||||
recordBatch, err := cdata.ImportCRecordBatch(goCArr, goCSchema)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert ArrowArray to Record: %w", err)
|
||||
}
|
||||
|
||||
// Return the RecordBatch as an arrow.Record
|
||||
return recordBatch, nil
|
||||
}
|
||||
|
||||
func (pr *PackedReader) Close() error {
|
||||
status := C.CloseReader(pr.cPackedReader)
|
||||
if status != 0 {
|
||||
return errors.New("PackedReader: failed to close file")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package packed
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
"github.com/stretchr/testify/suite"
|
||||
"golang.org/x/exp/rand"
|
||||
)
|
||||
|
||||
func TestPackedReadAndWrite(t *testing.T) {
|
||||
suite.Run(t, new(PackedTestSuite))
|
||||
}
|
||||
|
||||
type PackedTestSuite struct {
|
||||
suite.Suite
|
||||
schema *arrow.Schema
|
||||
rec arrow.Record
|
||||
}
|
||||
|
||||
func (suite *PackedTestSuite) SetupTest() {
|
||||
schema := arrow.NewSchema([]arrow.Field{
|
||||
{Name: "a", Type: arrow.PrimitiveTypes.Int32},
|
||||
{Name: "b", Type: arrow.PrimitiveTypes.Int64},
|
||||
{Name: "c", Type: arrow.BinaryTypes.String},
|
||||
}, nil)
|
||||
suite.schema = schema
|
||||
|
||||
b := array.NewRecordBuilder(memory.DefaultAllocator, schema)
|
||||
defer b.Release()
|
||||
for idx := range schema.Fields() {
|
||||
switch idx {
|
||||
case 0:
|
||||
b.Field(idx).(*array.Int32Builder).AppendValues(
|
||||
[]int32{int32(1), int32(2), int32(3)}, nil,
|
||||
)
|
||||
case 1:
|
||||
b.Field(idx).(*array.Int64Builder).AppendValues(
|
||||
[]int64{int64(4), int64(5), int64(6)}, nil,
|
||||
)
|
||||
case 2:
|
||||
b.Field(idx).(*array.StringBuilder).AppendValues(
|
||||
[]string{"a", "b", "c"}, nil,
|
||||
)
|
||||
}
|
||||
}
|
||||
rec := b.NewRecord()
|
||||
suite.rec = rec
|
||||
}
|
||||
|
||||
func (suite *PackedTestSuite) TestPackedOneFile() {
|
||||
batches := 100
|
||||
|
||||
path := "/tmp"
|
||||
bufferSize := 10 * 1024 * 1024 // 10MB
|
||||
pw, err := NewPackedWriter(path, suite.schema, bufferSize)
|
||||
suite.NoError(err)
|
||||
for i := 0; i < batches; i++ {
|
||||
err = pw.WriteRecordBatch(suite.rec)
|
||||
suite.NoError(err)
|
||||
}
|
||||
err = pw.Close()
|
||||
suite.NoError(err)
|
||||
|
||||
reader, err := NewPackedReader(path, suite.schema, bufferSize)
|
||||
suite.NoError(err)
|
||||
rr, err := reader.ReadNext()
|
||||
suite.NoError(err)
|
||||
defer rr.Release()
|
||||
suite.Equal(int64(3*batches), rr.NumRows())
|
||||
}
|
||||
|
||||
func (suite *PackedTestSuite) TestPackedMultiFiles() {
|
||||
batches := 1000
|
||||
|
||||
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.schema)
|
||||
strLen := 1000
|
||||
arrLen := 30
|
||||
defer b.Release()
|
||||
for idx := range suite.schema.Fields() {
|
||||
switch idx {
|
||||
case 0:
|
||||
values := make([]int32, arrLen)
|
||||
for i := 0; i < arrLen; i++ {
|
||||
values[i] = int32(i + 1)
|
||||
}
|
||||
b.Field(idx).(*array.Int32Builder).AppendValues(values, nil)
|
||||
case 1:
|
||||
values := make([]int64, arrLen)
|
||||
for i := 0; i < arrLen; i++ {
|
||||
values[i] = int64(i + 1)
|
||||
}
|
||||
b.Field(idx).(*array.Int64Builder).AppendValues(values, nil)
|
||||
case 2:
|
||||
values := make([]string, arrLen)
|
||||
for i := 0; i < arrLen; i++ {
|
||||
values[i] = randomString(strLen)
|
||||
}
|
||||
b.Field(idx).(*array.StringBuilder).AppendValues(values, nil)
|
||||
}
|
||||
}
|
||||
rec := b.NewRecord()
|
||||
defer rec.Release()
|
||||
path := "/tmp"
|
||||
bufferSize := 10 * 1024 * 1024 // 10MB
|
||||
pw, err := NewPackedWriter(path, suite.schema, bufferSize)
|
||||
suite.NoError(err)
|
||||
for i := 0; i < batches; i++ {
|
||||
err = pw.WriteRecordBatch(rec)
|
||||
suite.NoError(err)
|
||||
}
|
||||
err = pw.Close()
|
||||
suite.NoError(err)
|
||||
|
||||
reader, err := NewPackedReader(path, suite.schema, bufferSize)
|
||||
suite.NoError(err)
|
||||
var rows int64 = 0
|
||||
var rr arrow.Record
|
||||
for {
|
||||
rr, err = reader.ReadNext()
|
||||
suite.NoError(err)
|
||||
if rr == nil {
|
||||
// end of file
|
||||
break
|
||||
}
|
||||
|
||||
rows += rr.NumRows()
|
||||
}
|
||||
|
||||
suite.Equal(int64(arrLen*batches), rows)
|
||||
}
|
||||
|
||||
func randomString(length int) string {
|
||||
const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
result := make([]byte, length)
|
||||
for i := range result {
|
||||
result[i] = charset[rand.Intn(len(charset))]
|
||||
}
|
||||
return string(result)
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package packed
|
||||
|
||||
/*
|
||||
#cgo pkg-config: milvus_core
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "segcore/packed_writer_c.h"
|
||||
#include "arrow/c/abi.h"
|
||||
#include "arrow/c/helpers.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||
"github.com/cockroachdb/errors"
|
||||
)
|
||||
|
||||
func NewPackedWriter(path string, schema *arrow.Schema, bufferSize int) (*PackedWriter, error) {
|
||||
var cas cdata.CArrowSchema
|
||||
cdata.ExportArrowSchema(schema, &cas)
|
||||
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
|
||||
cBufferSize := C.int64_t(bufferSize)
|
||||
|
||||
var cPackedWriter C.CPackedWriter
|
||||
status := C.NewPackedWriter(cPath, cSchema, cBufferSize, &cPackedWriter)
|
||||
if status != 0 {
|
||||
return nil, fmt.Errorf("failed to new packed writer: %s, status: %d", path, status)
|
||||
}
|
||||
return &PackedWriter{cPackedWriter: cPackedWriter}, nil
|
||||
}
|
||||
|
||||
func (pw *PackedWriter) WriteRecordBatch(recordBatch arrow.Record) error {
|
||||
var caa cdata.CArrowArray
|
||||
var cas cdata.CArrowSchema
|
||||
|
||||
cdata.ExportArrowRecordBatch(recordBatch, &caa, &cas)
|
||||
|
||||
cArr := (*C.struct_ArrowArray)(unsafe.Pointer(&caa))
|
||||
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||
|
||||
status := C.WriteRecordBatch(pw.cPackedWriter, cArr, cSchema)
|
||||
if status != 0 {
|
||||
return errors.New("PackedWriter: failed to write record batch")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pw *PackedWriter) Close() error {
|
||||
status := C.CloseWriter(pw.cPackedWriter)
|
||||
if status != 0 {
|
||||
return errors.New("PackedWriter: failed to close file")
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package packed
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include "arrow/c/abi.h"
|
||||
#include "arrow/c/helpers.h"
|
||||
#include "segcore/packed_reader_c.h"
|
||||
#include "segcore/packed_writer_c.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||
)
|
||||
|
||||
type PackedWriter struct {
|
||||
cPackedWriter C.CPackedWriter
|
||||
}
|
||||
|
||||
type PackedReader struct {
|
||||
cPackedReader C.CPackedReader
|
||||
arr *cdata.CArrowArray
|
||||
schema *arrow.Schema
|
||||
}
|
||||
|
||||
type (
|
||||
// CArrowSchema is the C Data Interface for ArrowSchemas
|
||||
CArrowSchema = C.struct_ArrowSchema
|
||||
// CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h
|
||||
CArrowArray = C.struct_ArrowArray
|
||||
)
|
|
@ -0,0 +1,65 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package commonreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type DeleteReader struct {
|
||||
recordReader array.RecordReader
|
||||
schemaOptions *schema.SchemaOptions
|
||||
deleteFragments fragment.DeleteFragmentVector
|
||||
options *options.ReadOptions
|
||||
}
|
||||
|
||||
func (d DeleteReader) Retain() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (d DeleteReader) Release() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (d DeleteReader) Schema() *arrow.Schema {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (d DeleteReader) Next() bool {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (d DeleteReader) Record() arrow.Record {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (d DeleteReader) Err() error {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func NewDeleteReader(recordReader array.RecordReader, schemaOptions *schema.SchemaOptions, deleteFragments fragment.DeleteFragmentVector, options *options.ReadOptions) *DeleteReader {
|
||||
return &DeleteReader{recordReader: recordReader, schemaOptions: schemaOptions, deleteFragments: deleteFragments, options: options}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package commonreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type FilterReader struct {
|
||||
recordReader array.RecordReader
|
||||
option *options.ReadOptions
|
||||
currentFilteredBatchReader array.RecordReader
|
||||
}
|
||||
|
||||
func (r *FilterReader) Retain() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (r *FilterReader) Release() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (r *FilterReader) Schema() *arrow.Schema {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (r *FilterReader) Record() arrow.Record {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (r *FilterReader) Err() error {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func MakeFilterReader(recordReader array.RecordReader, option *options.ReadOptions) *FilterReader {
|
||||
return &FilterReader{
|
||||
recordReader: recordReader,
|
||||
option: option,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *FilterReader) Next() bool {
|
||||
//for {
|
||||
// if r.currentFilteredBatchReader != nil {
|
||||
// filteredBatch := r.currentFilteredBatchReader.Next()
|
||||
// if err != nil {
|
||||
// return false
|
||||
// }
|
||||
// if filteredBatch == nil {
|
||||
// r.currentFilteredBatchReader = nil
|
||||
// continue
|
||||
// }
|
||||
// return filteredBatch, nil
|
||||
// }
|
||||
// err := r.NextFilteredBatchReader()
|
||||
// if err != nil {
|
||||
// return nil
|
||||
// }
|
||||
// if r.currentFilteredBatchReader == nil {
|
||||
// return nil
|
||||
// }
|
||||
//}
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package commonreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type ProjectionReader struct {
|
||||
array.RecordReader
|
||||
reader array.RecordReader
|
||||
options *options.ReadOptions
|
||||
schema *arrow.Schema
|
||||
}
|
||||
|
||||
func NewProjectionReader(reader array.RecordReader, options *options.ReadOptions, schema *arrow.Schema) array.RecordReader {
|
||||
projectionSchema := utils.ProjectSchema(schema, options.Columns)
|
||||
return &ProjectionReader{reader: reader, options: options, schema: projectionSchema}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package recordreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type FilterQueryRecordReader struct {
|
||||
// TODO implement me
|
||||
ref int64
|
||||
schema *schema.Schema
|
||||
options *options.ReadOptions
|
||||
fs fs.Fs
|
||||
scalarFragment fragment.FragmentVector
|
||||
vectorFragment fragment.FragmentVector
|
||||
deleteFragments fragment.DeleteFragmentVector
|
||||
record arrow.Record
|
||||
}
|
||||
|
||||
func NewFilterQueryReader(
|
||||
s *schema.Schema,
|
||||
options *options.ReadOptions,
|
||||
f fs.Fs,
|
||||
scalarFragment fragment.FragmentVector,
|
||||
vectorFragment fragment.FragmentVector,
|
||||
deleteFragments fragment.DeleteFragmentVector,
|
||||
) array.RecordReader {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package recordreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type MergeRecordReader struct {
|
||||
ref int64
|
||||
schema *schema.Schema
|
||||
options *options.ReadOptions
|
||||
fs fs.Fs
|
||||
scalarFragments fragment.FragmentVector
|
||||
vectorFragments fragment.FragmentVector
|
||||
deleteFragments fragment.DeleteFragmentVector
|
||||
record arrow.Record
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Retain() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Release() {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Schema() *arrow.Schema {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Next() bool {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Record() arrow.Record {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (m MergeRecordReader) Err() error {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func NewMergeRecordReader(
|
||||
s *schema.Schema,
|
||||
options *options.ReadOptions,
|
||||
f fs.Fs,
|
||||
scalarFragment fragment.FragmentVector,
|
||||
vectorFragment fragment.FragmentVector,
|
||||
deleteFragments fragment.DeleteFragmentVector,
|
||||
) *MergeRecordReader {
|
||||
// TODO implement me
|
||||
panic("implement me")
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package recordreader
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/arrowutil"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type MultiFilesSequentialReader struct {
|
||||
fs fs.Fs
|
||||
schema *arrow.Schema
|
||||
files []string
|
||||
nextPos int
|
||||
options *options.ReadOptions
|
||||
currReader array.RecordReader
|
||||
err error
|
||||
ref int64
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Retain() {
|
||||
atomic.AddInt64(&m.ref, 1)
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Release() {
|
||||
if atomic.AddInt64(&m.ref, -1) == 0 {
|
||||
if m.currReader != nil {
|
||||
m.currReader.Release()
|
||||
m.currReader = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Schema() *arrow.Schema {
|
||||
return m.schema
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Next() bool {
|
||||
for {
|
||||
if m.currReader == nil {
|
||||
if m.nextPos >= len(m.files) {
|
||||
return false
|
||||
}
|
||||
|
||||
m.nextReader()
|
||||
if m.err != nil {
|
||||
return false
|
||||
}
|
||||
m.nextPos++
|
||||
}
|
||||
if m.currReader.Next() {
|
||||
return true
|
||||
}
|
||||
if m.currReader.Err() != nil {
|
||||
m.err = m.currReader.Err()
|
||||
return false
|
||||
}
|
||||
if m.currReader != nil {
|
||||
m.currReader.Release()
|
||||
m.currReader = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Record() arrow.Record {
|
||||
if m.currReader != nil {
|
||||
return m.currReader.Record()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) Err() error {
|
||||
return m.err
|
||||
}
|
||||
|
||||
func (m *MultiFilesSequentialReader) nextReader() {
|
||||
var fileReader *pqarrow.FileReader
|
||||
fileReader, m.err = arrowutil.MakeArrowFileReader(m.fs, m.files[m.nextPos])
|
||||
if m.err != nil {
|
||||
return
|
||||
}
|
||||
m.currReader, m.err = arrowutil.MakeArrowRecordReader(fileReader, m.options)
|
||||
}
|
||||
|
||||
func NewMultiFilesSequentialReader(fs fs.Fs, fragments fragment.FragmentVector, schema *arrow.Schema, options *options.ReadOptions) *MultiFilesSequentialReader {
|
||||
files := make([]string, 0, len(fragments))
|
||||
for _, f := range fragments {
|
||||
files = append(files, f.Files()...)
|
||||
}
|
||||
|
||||
return &MultiFilesSequentialReader{
|
||||
fs: fs,
|
||||
schema: schema,
|
||||
options: options,
|
||||
files: files,
|
||||
nextPos: 0,
|
||||
ref: 1,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package recordreader
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
func MakeRecordReader(
|
||||
m *manifest.Manifest,
|
||||
s *schema.Schema,
|
||||
f fs.Fs,
|
||||
deleteFragments fragment.DeleteFragmentVector,
|
||||
options *options.ReadOptions,
|
||||
) array.RecordReader {
|
||||
relatedColumns := make([]string, 0)
|
||||
relatedColumns = append(relatedColumns, options.Columns...)
|
||||
|
||||
for _, filter := range options.Filters {
|
||||
relatedColumns = append(relatedColumns, filter.GetColumnName())
|
||||
}
|
||||
|
||||
scalarData := m.GetScalarFragments()
|
||||
vectorData := m.GetVectorFragments()
|
||||
|
||||
onlyScalar := onlyContainScalarColumns(s, relatedColumns)
|
||||
onlyVector := onlyContainVectorColumns(s, relatedColumns)
|
||||
|
||||
if onlyScalar || onlyVector {
|
||||
var dataFragments fragment.FragmentVector
|
||||
if onlyScalar {
|
||||
dataFragments = scalarData
|
||||
} else {
|
||||
dataFragments = vectorData
|
||||
}
|
||||
return NewScanRecordReader(s, options, f, dataFragments, deleteFragments)
|
||||
}
|
||||
if len(options.Filters) > 0 && filtersOnlyContainPKAndVersion(s, options.FiltersV2) {
|
||||
return NewMergeRecordReader(s, options, f, scalarData, vectorData, deleteFragments)
|
||||
}
|
||||
return NewFilterQueryReader(s, options, f, scalarData, vectorData, deleteFragments)
|
||||
}
|
||||
|
||||
func onlyContainVectorColumns(schema *schema.Schema, relatedColumns []string) bool {
|
||||
for _, column := range relatedColumns {
|
||||
if schema.Options().VectorColumn != column && schema.Options().PrimaryColumn != column && schema.Options().VersionColumn != column {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func onlyContainScalarColumns(schema *schema.Schema, relatedColumns []string) bool {
|
||||
for _, column := range relatedColumns {
|
||||
if schema.Options().VectorColumn == column {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func filtersOnlyContainPKAndVersion(s *schema.Schema, filters []filter.Filter) bool {
|
||||
for _, f := range filters {
|
||||
if f.GetColumnName() != s.Options().PrimaryColumn &&
|
||||
f.GetColumnName() != s.Options().VersionColumn {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func MakeScanDeleteReader(manifest *manifest.Manifest, fs fs.Fs) array.RecordReader {
|
||||
return NewMultiFilesSequentialReader(fs, manifest.GetDeleteFragments(), manifest.GetSchema().DeleteSchema(), options.NewReadOptions())
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package recordreader
|
||||
|
||||
import (
|
||||
"io"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/format/parquet"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/reader/commonreader"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type ScanRecordReader struct {
|
||||
ref int64
|
||||
schema *schema.Schema
|
||||
options *options.ReadOptions
|
||||
fs fs.Fs
|
||||
dataFragments fragment.FragmentVector
|
||||
deleteFragments fragment.DeleteFragmentVector
|
||||
rec arrow.Record
|
||||
curReader format.Reader
|
||||
reader array.RecordReader
|
||||
nextPos int
|
||||
err error
|
||||
}
|
||||
|
||||
func NewScanRecordReader(
|
||||
s *schema.Schema,
|
||||
options *options.ReadOptions,
|
||||
f fs.Fs,
|
||||
dataFragments fragment.FragmentVector,
|
||||
deleteFragments fragment.DeleteFragmentVector,
|
||||
) *ScanRecordReader {
|
||||
return &ScanRecordReader{
|
||||
ref: 1,
|
||||
schema: s,
|
||||
options: options,
|
||||
fs: f,
|
||||
dataFragments: dataFragments,
|
||||
deleteFragments: deleteFragments,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Schema() *arrow.Schema {
|
||||
return utils.ProjectSchema(r.schema.Schema(), r.options.OutputColumns())
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Retain() {
|
||||
atomic.AddInt64(&r.ref, 1)
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Release() {
|
||||
if atomic.AddInt64(&r.ref, -1) == 0 {
|
||||
if r.rec != nil {
|
||||
r.rec.Release()
|
||||
r.rec = nil
|
||||
}
|
||||
if r.curReader != nil {
|
||||
r.curReader.Close()
|
||||
r.curReader = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Next() bool {
|
||||
datafiles := fragment.ToFilesVector(r.dataFragments)
|
||||
log.Debug("ScanRecordReader Next", zap.Any("datafiles", datafiles))
|
||||
if r.rec != nil {
|
||||
r.rec.Release()
|
||||
r.rec = nil
|
||||
}
|
||||
for {
|
||||
if r.curReader == nil {
|
||||
if r.nextPos >= len(datafiles) {
|
||||
return false
|
||||
}
|
||||
// FIXME: nil options
|
||||
reader, err := parquet.NewFileReader(r.fs, datafiles[r.nextPos], r.options)
|
||||
if err != nil {
|
||||
r.err = err
|
||||
return false
|
||||
}
|
||||
r.nextPos++
|
||||
r.curReader = reader
|
||||
}
|
||||
|
||||
rec, err := r.curReader.Read()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
r.curReader.Close()
|
||||
r.curReader = nil
|
||||
continue
|
||||
}
|
||||
// if error occurs in the middle of reading, return false
|
||||
r.curReader.Close()
|
||||
r.curReader = nil
|
||||
r.err = err
|
||||
return false
|
||||
}
|
||||
|
||||
if rec.NumRows() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
r.rec = rec
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Record() arrow.Record {
|
||||
return r.rec
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) Err() error {
|
||||
return r.err
|
||||
}
|
||||
|
||||
func (r *ScanRecordReader) MakeInnerReader() array.RecordReader {
|
||||
// TODO implement me
|
||||
reader := NewMultiFilesSequentialReader(r.fs, r.dataFragments, r.Schema(), r.options)
|
||||
|
||||
filterReader := commonreader.MakeFilterReader(reader, r.options)
|
||||
|
||||
deleteReader := commonreader.NewDeleteReader(filterReader, r.schema.Options(), r.deleteFragments, r.options)
|
||||
|
||||
res := commonreader.NewProjectionReader(deleteReader, r.options, r.schema.Schema())
|
||||
return res
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package lock
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
)
|
||||
|
||||
type LockManager interface {
|
||||
// Acquire the lock, wait until the lock is available, return the version to be modified or use the newest version
|
||||
Acquire() (version int64, useLatestVersion bool, err error)
|
||||
// Release the lock, accepts the new allocated manifest version and success state of operations between Acquire and Release as parameters
|
||||
Release(version int64, success bool) error
|
||||
}
|
||||
|
||||
type EmptyLockManager struct{}
|
||||
|
||||
func (h *EmptyLockManager) Acquire() (version int64, useLatestVersion bool, err error) {
|
||||
return constant.LatestManifestVersion, true, nil
|
||||
}
|
||||
|
||||
func (h *EmptyLockManager) Release(_ int64, _ bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type MemoryLockManager struct {
|
||||
mu sync.Mutex
|
||||
locks map[int64]bool
|
||||
nextVersion int64
|
||||
}
|
||||
|
||||
func NewMemoryLockManager() *MemoryLockManager {
|
||||
return &MemoryLockManager{
|
||||
mu: sync.Mutex{},
|
||||
locks: make(map[int64]bool),
|
||||
nextVersion: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MemoryLockManager) Acquire() (version int64, useLatestVersion bool, err error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
version = m.nextVersion
|
||||
|
||||
if m.locks[version] {
|
||||
log.Warn("lock is already acquired", zap.Int64("version", version))
|
||||
return version, false, errors.New("lock is already acquired")
|
||||
}
|
||||
|
||||
if version == constant.LatestManifestVersion {
|
||||
useLatestVersion = true
|
||||
} else {
|
||||
useLatestVersion = false
|
||||
}
|
||||
m.locks[version] = true
|
||||
log.Info("acquire lock", zap.Int64("version", version), zap.Bool("useLatestVersion", useLatestVersion))
|
||||
|
||||
return version, useLatestVersion, nil
|
||||
}
|
||||
|
||||
func (m *MemoryLockManager) Release(version int64, success bool) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
realVersion := int64(0)
|
||||
realVersion = version - 1
|
||||
if !m.locks[realVersion] {
|
||||
return errors.New("lock is already released or does not exist")
|
||||
}
|
||||
m.locks[realVersion] = false
|
||||
log.Info("release lock", zap.Int64("version", realVersion), zap.Bool("success", success))
|
||||
if success {
|
||||
m.nextVersion = version
|
||||
} else {
|
||||
m.nextVersion = constant.LatestManifestVersion
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||
)
|
||||
|
||||
type ManifestCommit struct {
|
||||
ops []ManifestCommitOp
|
||||
lock lock.LockManager
|
||||
rw ManifestReaderWriter
|
||||
}
|
||||
|
||||
func (m *ManifestCommit) AddOp(op ...ManifestCommitOp) {
|
||||
m.ops = append(m.ops, op...)
|
||||
}
|
||||
|
||||
func (m ManifestCommit) Commit() (manifest *Manifest, err error) {
|
||||
ver, latest, err := m.lock.Acquire()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var version int64
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if err2 := m.lock.Release(-1, false); err2 != nil {
|
||||
err = err2
|
||||
}
|
||||
} else {
|
||||
err = m.lock.Release(version, true)
|
||||
}
|
||||
}()
|
||||
var base *Manifest
|
||||
if latest {
|
||||
base, err = m.rw.Read(constant.LatestManifestVersion)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
base.version++
|
||||
} else {
|
||||
base, err = m.rw.Read(ver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
maxVersion, err := m.rw.MaxVersion()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
base.version = maxVersion + 1
|
||||
}
|
||||
|
||||
for _, op := range m.ops {
|
||||
op.commit(base)
|
||||
}
|
||||
version = base.version
|
||||
|
||||
err = m.rw.Write(base)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return base, nil
|
||||
}
|
||||
|
||||
func NewManifestCommit(lock lock.LockManager, rw ManifestReaderWriter) ManifestCommit {
|
||||
return ManifestCommit{nil, lock, rw}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
)
|
||||
|
||||
type ManifestCommitOp interface {
|
||||
commit(manifest *Manifest) error
|
||||
}
|
||||
|
||||
type AddScalarFragmentOp struct {
|
||||
ScalarFragment fragment.Fragment
|
||||
}
|
||||
|
||||
func (op AddScalarFragmentOp) commit(manifest *Manifest) error {
|
||||
op.ScalarFragment.SetFragmentId(manifest.Version())
|
||||
manifest.AddScalarFragment(op.ScalarFragment)
|
||||
return nil
|
||||
}
|
||||
|
||||
type AddVectorFragmentOp struct {
|
||||
VectorFragment fragment.Fragment
|
||||
}
|
||||
|
||||
func (op AddVectorFragmentOp) commit(manifest *Manifest) error {
|
||||
op.VectorFragment.SetFragmentId(manifest.Version())
|
||||
manifest.AddVectorFragment(op.VectorFragment)
|
||||
return nil
|
||||
}
|
||||
|
||||
type AddDeleteFragmentOp struct {
|
||||
DeleteFragment fragment.Fragment
|
||||
}
|
||||
|
||||
func (op AddDeleteFragmentOp) commit(manifest *Manifest) error {
|
||||
op.DeleteFragment.SetFragmentId(manifest.Version())
|
||||
manifest.AddDeleteFragment(op.DeleteFragment)
|
||||
return nil
|
||||
}
|
||||
|
||||
type AddBlobOp struct {
|
||||
Replace bool
|
||||
Blob blob.Blob
|
||||
}
|
||||
|
||||
func (op AddBlobOp) commit(manifest *Manifest) error {
|
||||
if !op.Replace && manifest.HasBlob(op.Blob.Name) {
|
||||
return errors.ErrBlobAlreadyExist
|
||||
}
|
||||
manifest.AddBlob(op.Blob)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,243 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
)
|
||||
|
||||
type Manifest struct {
|
||||
schema *schema.Schema
|
||||
ScalarFragments fragment.FragmentVector
|
||||
vectorFragments fragment.FragmentVector
|
||||
deleteFragments fragment.FragmentVector
|
||||
blobs []blob.Blob
|
||||
version int64
|
||||
}
|
||||
|
||||
func NewManifest(schema *schema.Schema) *Manifest {
|
||||
return &Manifest{
|
||||
schema: schema,
|
||||
}
|
||||
}
|
||||
|
||||
func Init() *Manifest {
|
||||
return &Manifest{
|
||||
schema: schema.NewSchema(arrow.NewSchema(nil, nil), schema.DefaultSchemaOptions()),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manifest) Copy() *Manifest {
|
||||
copied := *m
|
||||
return &copied
|
||||
}
|
||||
|
||||
func (m *Manifest) GetSchema() *schema.Schema {
|
||||
return m.schema
|
||||
}
|
||||
|
||||
func (m *Manifest) AddScalarFragment(fragment fragment.Fragment) {
|
||||
m.ScalarFragments = append(m.ScalarFragments, fragment)
|
||||
}
|
||||
|
||||
func (m *Manifest) AddVectorFragment(fragment fragment.Fragment) {
|
||||
m.vectorFragments = append(m.vectorFragments, fragment)
|
||||
}
|
||||
|
||||
func (m *Manifest) AddDeleteFragment(fragment fragment.Fragment) {
|
||||
m.deleteFragments = append(m.deleteFragments, fragment)
|
||||
}
|
||||
|
||||
func (m *Manifest) GetScalarFragments() fragment.FragmentVector {
|
||||
return m.ScalarFragments
|
||||
}
|
||||
|
||||
func (m *Manifest) GetVectorFragments() fragment.FragmentVector {
|
||||
return m.vectorFragments
|
||||
}
|
||||
|
||||
func (m *Manifest) GetDeleteFragments() fragment.FragmentVector {
|
||||
return m.deleteFragments
|
||||
}
|
||||
|
||||
func (m *Manifest) Version() int64 {
|
||||
return m.version
|
||||
}
|
||||
|
||||
func (m *Manifest) SetVersion(version int64) {
|
||||
m.version = version
|
||||
}
|
||||
|
||||
func (m *Manifest) GetBlobs() []blob.Blob {
|
||||
return m.blobs
|
||||
}
|
||||
|
||||
func (m *Manifest) ToProtobuf() (*storagev2pb.Manifest, error) {
|
||||
manifest := &storagev2pb.Manifest{}
|
||||
manifest.Version = m.version
|
||||
for _, vectorFragment := range m.vectorFragments {
|
||||
manifest.VectorFragments = append(manifest.VectorFragments, vectorFragment.ToProtobuf())
|
||||
}
|
||||
for _, scalarFragment := range m.ScalarFragments {
|
||||
manifest.ScalarFragments = append(manifest.ScalarFragments, scalarFragment.ToProtobuf())
|
||||
}
|
||||
for _, deleteFragment := range m.deleteFragments {
|
||||
manifest.DeleteFragments = append(manifest.DeleteFragments, deleteFragment.ToProtobuf())
|
||||
}
|
||||
|
||||
for _, blob := range m.blobs {
|
||||
manifest.Blobs = append(manifest.Blobs, blob.ToProtobuf())
|
||||
}
|
||||
|
||||
schemaProto, err := m.schema.ToProtobuf()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
manifest.Schema = schemaProto
|
||||
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
func (m *Manifest) FromProtobuf(manifest *storagev2pb.Manifest) error {
|
||||
err := m.schema.FromProtobuf(manifest.Schema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, vectorFragment := range manifest.VectorFragments {
|
||||
m.vectorFragments = append(m.vectorFragments, fragment.FromProtobuf(vectorFragment))
|
||||
}
|
||||
|
||||
for _, scalarFragment := range manifest.ScalarFragments {
|
||||
m.ScalarFragments = append(m.ScalarFragments, fragment.FromProtobuf(scalarFragment))
|
||||
}
|
||||
|
||||
for _, deleteFragment := range manifest.DeleteFragments {
|
||||
m.deleteFragments = append(m.deleteFragments, fragment.FromProtobuf(deleteFragment))
|
||||
}
|
||||
|
||||
for _, b := range manifest.Blobs {
|
||||
m.blobs = append(m.blobs, blob.FromProtobuf(b))
|
||||
}
|
||||
|
||||
m.version = manifest.Version
|
||||
return nil
|
||||
}
|
||||
|
||||
func WriteManifestFile(manifest *Manifest, output file.File) error {
|
||||
protoManifest, err := manifest.ToProtobuf()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bytes, err := proto.Marshal(protoManifest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write manifest file: %w", err)
|
||||
}
|
||||
write, err := output.Write(bytes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("write manifest file: %w", err)
|
||||
}
|
||||
if write != len(bytes) {
|
||||
return fmt.Errorf("failed to write whole file, expect: %v, actual: %v", len(bytes), write)
|
||||
}
|
||||
if err = output.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manifest) HasBlob(name string) bool {
|
||||
for _, b := range m.blobs {
|
||||
if b.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *Manifest) AddBlob(blob blob.Blob) {
|
||||
m.blobs = append(m.blobs, blob)
|
||||
}
|
||||
|
||||
func (m *Manifest) RemoveBlobIfExist(name string) {
|
||||
idx := -1
|
||||
for i, b := range m.blobs {
|
||||
if b.Name == name {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
m.blobs = append(m.blobs[0:idx], m.blobs[idx+1:]...)
|
||||
}
|
||||
|
||||
func (m *Manifest) GetBlob(name string) (blob.Blob, bool) {
|
||||
for _, b := range m.blobs {
|
||||
if b.Name == name {
|
||||
return b, true
|
||||
}
|
||||
}
|
||||
|
||||
return blob.Blob{}, false
|
||||
}
|
||||
|
||||
func ParseFromFile(f fs.Fs, path string) (*Manifest, error) {
|
||||
manifest := Init()
|
||||
manifestProto := &storagev2pb.Manifest{}
|
||||
|
||||
buf, err := f.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = proto.Unmarshal(buf, manifestProto)
|
||||
if err != nil {
|
||||
log.Error("Failed to unmarshal manifest proto", log.String("err", err.Error()))
|
||||
return nil, fmt.Errorf("parse from file: %w", err)
|
||||
}
|
||||
err = manifest.FromProtobuf(manifestProto)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
// TODO REMOVE BELOW CODE
|
||||
|
||||
type DataFile struct {
|
||||
path string
|
||||
cols []string
|
||||
}
|
||||
|
||||
func (d *DataFile) Path() string {
|
||||
return d.path
|
||||
}
|
||||
|
||||
func NewDataFile(path string) *DataFile {
|
||||
return &DataFile{path: path}
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
)
|
||||
|
||||
var ErrManifestNotFound = errors.New("manifest not found")
|
||||
|
||||
type ManifestReaderWriter struct {
|
||||
fs fs.Fs
|
||||
root string
|
||||
}
|
||||
|
||||
func findAllManifest(fs fs.Fs, path string) ([]fs.FileEntry, error) {
|
||||
files, err := fs.List(path)
|
||||
log.Debug("list all manifest:", log.Any("files", files))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return files, nil
|
||||
}
|
||||
|
||||
func (rw ManifestReaderWriter) Read(version int64) (*Manifest, error) {
|
||||
manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var maxVersionManifest string
|
||||
var maxVersion int64 = -1
|
||||
for _, m := range manifests {
|
||||
ver := utils.ParseVersionFromFileName(filepath.Base(m.Path))
|
||||
if ver == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if version != constant.LatestManifestVersion {
|
||||
if ver == version {
|
||||
return ParseFromFile(rw.fs, m.Path)
|
||||
}
|
||||
} else if ver > maxVersion {
|
||||
maxVersion = ver
|
||||
maxVersionManifest = m.Path
|
||||
}
|
||||
}
|
||||
|
||||
if maxVersion != -1 {
|
||||
return ParseFromFile(rw.fs, maxVersionManifest)
|
||||
}
|
||||
return nil, ErrManifestNotFound
|
||||
}
|
||||
|
||||
func (rw ManifestReaderWriter) MaxVersion() (int64, error) {
|
||||
manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root))
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
var max int64 = -1
|
||||
for _, m := range manifests {
|
||||
ver := utils.ParseVersionFromFileName(filepath.Base(m.Path))
|
||||
if ver == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if ver > max {
|
||||
max = ver
|
||||
}
|
||||
}
|
||||
|
||||
if max == -1 {
|
||||
return -1, ErrManifestNotFound
|
||||
}
|
||||
return max, nil
|
||||
}
|
||||
|
||||
func (rw ManifestReaderWriter) Write(m *Manifest) error {
|
||||
tmpManifestFilePath := utils.GetManifestTmpFilePath(rw.root, m.Version())
|
||||
manifestFilePath := utils.GetManifestFilePath(rw.root, m.Version())
|
||||
log.Debug("path", log.String("tmpManifestFilePath", tmpManifestFilePath), log.String("manifestFilePath", manifestFilePath))
|
||||
output, err := rw.fs.OpenFile(tmpManifestFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open file error: %w", err)
|
||||
}
|
||||
if err = WriteManifestFile(m, output); err != nil {
|
||||
return err
|
||||
}
|
||||
err = rw.fs.Rename(tmpManifestFilePath, manifestFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("rename file error: %w", err)
|
||||
}
|
||||
log.Debug("save manifest file success", log.String("path", manifestFilePath))
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewManifestReaderWriter(fs fs.Fs, root string) ManifestReaderWriter {
|
||||
return ManifestReaderWriter{fs, root}
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package options
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
Schema *schema.Schema // optional
|
||||
Version int64 // optional
|
||||
LockManager lock.LockManager // optional, no lock manager as default
|
||||
}
|
||||
|
||||
type SpaceOptionsBuilder struct {
|
||||
options Options
|
||||
}
|
||||
|
||||
func (b *SpaceOptionsBuilder) SetSchema(schema *schema.Schema) *SpaceOptionsBuilder {
|
||||
b.options.Schema = schema
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *SpaceOptionsBuilder) SetVersion(version int64) *SpaceOptionsBuilder {
|
||||
b.options.Version = version
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *SpaceOptionsBuilder) SetLockManager(lockManager lock.LockManager) *SpaceOptionsBuilder {
|
||||
b.options.LockManager = lockManager
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *SpaceOptionsBuilder) Reset() {
|
||||
b.options = Options{LockManager: &lock.EmptyLockManager{}}
|
||||
}
|
||||
|
||||
func (b *SpaceOptionsBuilder) Build() Options { return b.options }
|
||||
|
||||
func NewSpaceOptionBuilder() *SpaceOptionsBuilder {
|
||||
return &SpaceOptionsBuilder{
|
||||
options: Options{
|
||||
Version: constant.LatestManifestVersion,
|
||||
LockManager: &lock.EmptyLockManager{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func DefaultOptions() *Options {
|
||||
return &Options{}
|
||||
}
|
||||
|
||||
type WriteOptions struct {
|
||||
MaxRecordPerFile int64
|
||||
}
|
||||
|
||||
var DefaultWriteOptions = WriteOptions{
|
||||
MaxRecordPerFile: 1024,
|
||||
}
|
||||
|
||||
func NewWriteOption() *WriteOptions {
|
||||
return &WriteOptions{
|
||||
MaxRecordPerFile: 1024,
|
||||
}
|
||||
}
|
||||
|
||||
type FsType int8
|
||||
|
||||
const (
|
||||
InMemory FsType = iota
|
||||
LocalFS
|
||||
S3
|
||||
)
|
||||
|
||||
type SpaceOptions struct {
|
||||
Fs FsType
|
||||
VectorColumns []string
|
||||
}
|
||||
|
||||
// TODO: Change to FilterSet type
|
||||
type FilterSet []filter.Filter
|
||||
|
||||
var version int64 = math.MaxInt64
|
||||
|
||||
type ReadOptions struct {
|
||||
// Filters map[string]filter.Filter
|
||||
Filters map[string]filter.Filter
|
||||
FiltersV2 FilterSet
|
||||
Columns []string
|
||||
ManifestVersion int64
|
||||
version int64
|
||||
}
|
||||
|
||||
func NewReadOptions() *ReadOptions {
|
||||
return &ReadOptions{
|
||||
Filters: make(map[string]filter.Filter),
|
||||
FiltersV2: make(FilterSet, 0),
|
||||
Columns: make([]string, 0),
|
||||
ManifestVersion: constant.LatestManifestVersion,
|
||||
version: math.MaxInt64,
|
||||
}
|
||||
}
|
||||
|
||||
func (o *ReadOptions) AddFilter(filter filter.Filter) {
|
||||
o.Filters[filter.GetColumnName()] = filter
|
||||
o.FiltersV2 = append(o.FiltersV2, filter)
|
||||
}
|
||||
|
||||
func (o *ReadOptions) AddColumn(column string) {
|
||||
o.Columns = append(o.Columns, column)
|
||||
}
|
||||
|
||||
func (o *ReadOptions) SetColumns(columns []string) {
|
||||
o.Columns = columns
|
||||
}
|
||||
|
||||
func (o *ReadOptions) SetVersion(version int64) {
|
||||
o.version = version
|
||||
}
|
||||
|
||||
func (o *ReadOptions) GetVersion() int64 {
|
||||
return o.version
|
||||
}
|
||||
|
||||
func (o *ReadOptions) OutputColumns() []string {
|
||||
return o.Columns
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package schema
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
)
|
||||
|
||||
// Schema is a wrapper of arrow schema
|
||||
type Schema struct {
|
||||
schema *arrow.Schema
|
||||
scalarSchema *arrow.Schema
|
||||
vectorSchema *arrow.Schema
|
||||
deleteSchema *arrow.Schema
|
||||
|
||||
options *SchemaOptions
|
||||
}
|
||||
|
||||
func (s *Schema) Schema() *arrow.Schema {
|
||||
return s.schema
|
||||
}
|
||||
|
||||
func (s *Schema) Options() *SchemaOptions {
|
||||
return s.options
|
||||
}
|
||||
|
||||
func NewSchema(schema *arrow.Schema, options *SchemaOptions) *Schema {
|
||||
return &Schema{
|
||||
schema: schema,
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Schema) Validate() error {
|
||||
err := s.options.Validate(s.schema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = s.BuildScalarSchema()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = s.BuildVectorSchema()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = s.BuildDeleteSchema()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Schema) ScalarSchema() *arrow.Schema {
|
||||
return s.scalarSchema
|
||||
}
|
||||
|
||||
func (s *Schema) VectorSchema() *arrow.Schema {
|
||||
return s.vectorSchema
|
||||
}
|
||||
|
||||
func (s *Schema) DeleteSchema() *arrow.Schema {
|
||||
return s.deleteSchema
|
||||
}
|
||||
|
||||
func (s *Schema) FromProtobuf(schema *storagev2pb.Schema) error {
|
||||
schemaType, err := utils.FromProtobufSchema(schema.ArrowSchema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s.schema = schemaType
|
||||
s.options.FromProtobuf(schema.GetSchemaOptions())
|
||||
s.BuildScalarSchema()
|
||||
s.BuildVectorSchema()
|
||||
s.BuildDeleteSchema()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Schema) ToProtobuf() (*storagev2pb.Schema, error) {
|
||||
schema := &storagev2pb.Schema{}
|
||||
arrowSchema, err := utils.ToProtobufSchema(s.schema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
schema.ArrowSchema = arrowSchema
|
||||
schema.SchemaOptions = s.options.ToProtobuf()
|
||||
return schema, nil
|
||||
}
|
||||
|
||||
func (s *Schema) BuildScalarSchema() error {
|
||||
fields := make([]arrow.Field, 0, len(s.schema.Fields()))
|
||||
for _, field := range s.schema.Fields() {
|
||||
if field.Name == s.options.VectorColumn {
|
||||
continue
|
||||
}
|
||||
fields = append(fields, field)
|
||||
}
|
||||
offsetFiled := arrow.Field{Name: constant.OffsetFieldName, Type: arrow.DataType(&arrow.Int64Type{})}
|
||||
fields = append(fields, offsetFiled)
|
||||
s.scalarSchema = arrow.NewSchema(fields, nil)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Schema) BuildVectorSchema() error {
|
||||
fields := make([]arrow.Field, 0, len(s.schema.Fields()))
|
||||
for _, field := range s.schema.Fields() {
|
||||
if field.Name == s.options.VectorColumn ||
|
||||
field.Name == s.options.PrimaryColumn ||
|
||||
field.Name == s.options.VersionColumn {
|
||||
fields = append(fields, field)
|
||||
}
|
||||
}
|
||||
s.vectorSchema = arrow.NewSchema(fields, nil)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Schema) BuildDeleteSchema() error {
|
||||
pkColumn, ok := s.schema.FieldsByName(s.options.PrimaryColumn)
|
||||
if !ok {
|
||||
return ErrPrimaryColumnNotFound
|
||||
}
|
||||
versionField, ok := s.schema.FieldsByName(s.options.VersionColumn)
|
||||
if !ok {
|
||||
return ErrVersionColumnNotFound
|
||||
}
|
||||
fields := make([]arrow.Field, 0, 2)
|
||||
fields = append(fields, pkColumn[0])
|
||||
fields = append(fields, versionField[0])
|
||||
s.deleteSchema = arrow.NewSchema(fields, nil)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package schema
|
||||
|
||||
import (
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/cockroachdb/errors"
|
||||
|
||||
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrPrimaryColumnNotFound = errors.New("primary column not found")
|
||||
ErrPrimaryColumnType = errors.New("primary column is not int64 or string")
|
||||
ErrPrimaryColumnEmpty = errors.New("primary column is empty")
|
||||
ErrVersionColumnNotFound = errors.New("version column not found")
|
||||
ErrVersionColumnType = errors.New("version column is not int64")
|
||||
ErrVectorColumnNotFound = errors.New("vector column not found")
|
||||
ErrVectorColumnType = errors.New("vector column is not fixed size binary or fixed size list")
|
||||
ErrVectorColumnEmpty = errors.New("vector column is empty")
|
||||
)
|
||||
|
||||
type SchemaOptions struct {
|
||||
PrimaryColumn string
|
||||
VersionColumn string
|
||||
VectorColumn string
|
||||
}
|
||||
|
||||
func DefaultSchemaOptions() *SchemaOptions {
|
||||
return &SchemaOptions{
|
||||
PrimaryColumn: "",
|
||||
VersionColumn: "",
|
||||
VectorColumn: "",
|
||||
}
|
||||
}
|
||||
|
||||
func (o *SchemaOptions) ToProtobuf() *storagev2pb.SchemaOptions {
|
||||
options := &storagev2pb.SchemaOptions{}
|
||||
options.PrimaryColumn = o.PrimaryColumn
|
||||
options.VersionColumn = o.VersionColumn
|
||||
options.VectorColumn = o.VectorColumn
|
||||
return options
|
||||
}
|
||||
|
||||
func (o *SchemaOptions) FromProtobuf(options *storagev2pb.SchemaOptions) {
|
||||
o.PrimaryColumn = options.PrimaryColumn
|
||||
o.VersionColumn = options.VersionColumn
|
||||
o.VectorColumn = options.VectorColumn
|
||||
}
|
||||
|
||||
func (o *SchemaOptions) Validate(schema *arrow.Schema) error {
|
||||
if o.PrimaryColumn != "" {
|
||||
primaryField, ok := schema.FieldsByName(o.PrimaryColumn)
|
||||
if !ok {
|
||||
return ErrPrimaryColumnNotFound
|
||||
} else if primaryField[0].Type.ID() != arrow.STRING && primaryField[0].Type.ID() != arrow.INT64 {
|
||||
return ErrPrimaryColumnType
|
||||
}
|
||||
} else {
|
||||
return ErrPrimaryColumnEmpty
|
||||
}
|
||||
if o.VersionColumn != "" {
|
||||
versionField, ok := schema.FieldsByName(o.VersionColumn)
|
||||
if !ok {
|
||||
return ErrVersionColumnNotFound
|
||||
} else if versionField[0].Type.ID() != arrow.INT64 {
|
||||
return ErrVersionColumnType
|
||||
}
|
||||
}
|
||||
if o.VectorColumn != "" {
|
||||
vectorField, b := schema.FieldsByName(o.VectorColumn)
|
||||
if !b {
|
||||
return ErrVectorColumnNotFound
|
||||
} else if vectorField[0].Type.ID() != arrow.FIXED_SIZE_BINARY && vectorField[0].Type.ID() != arrow.FIXED_SIZE_LIST {
|
||||
return ErrVectorColumnType
|
||||
}
|
||||
} else {
|
||||
return ErrVectorColumnEmpty
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *SchemaOptions) HasVersionColumn() bool {
|
||||
return o.VersionColumn != ""
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package schema
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// Test Schema.Schema
|
||||
func TestBuildSchema(t *testing.T) {
|
||||
pkField := arrow.Field{
|
||||
Name: "pk_field",
|
||||
Type: arrow.DataType(&arrow.Int64Type{}),
|
||||
Nullable: false,
|
||||
}
|
||||
vsField := arrow.Field{
|
||||
Name: "vs_field",
|
||||
Type: arrow.DataType(&arrow.Int64Type{}),
|
||||
Nullable: false,
|
||||
}
|
||||
vecField := arrow.Field{
|
||||
Name: "vec_field",
|
||||
Type: arrow.DataType(&arrow.FixedSizeBinaryType{ByteWidth: 16}),
|
||||
Nullable: false,
|
||||
}
|
||||
fields := []arrow.Field{pkField, vsField, vecField}
|
||||
|
||||
as := arrow.NewSchema(fields, nil)
|
||||
schemaOptions := &SchemaOptions{
|
||||
PrimaryColumn: "pk_field",
|
||||
VersionColumn: "vs_field",
|
||||
VectorColumn: "vec_field",
|
||||
}
|
||||
|
||||
sc := NewSchema(as, schemaOptions)
|
||||
err := sc.Validate()
|
||||
assert.NoError(t, err)
|
||||
}
|
|
@ -0,0 +1,220 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/reader/recordreader"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/transaction"
|
||||
)
|
||||
|
||||
type Space struct {
|
||||
path string
|
||||
fs fs.Fs
|
||||
deleteFragments fragment.DeleteFragmentVector
|
||||
manifest *manifest.Manifest
|
||||
lockManager lock.LockManager
|
||||
}
|
||||
|
||||
func (s *Space) init() error {
|
||||
for _, f := range s.manifest.GetDeleteFragments() {
|
||||
deleteFragment := fragment.Make(s.fs, s.manifest.GetSchema(), f)
|
||||
s.deleteFragments = append(s.deleteFragments, deleteFragment)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewSpace(f fs.Fs, path string, m *manifest.Manifest, lockManager lock.LockManager) *Space {
|
||||
deleteFragments := fragment.DeleteFragmentVector{}
|
||||
return &Space{
|
||||
fs: f,
|
||||
path: path,
|
||||
manifest: m,
|
||||
deleteFragments: deleteFragments,
|
||||
lockManager: lockManager,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Space) NewTransaction() transaction.Transaction {
|
||||
return transaction.NewConcurrentWriteTransaction(s)
|
||||
}
|
||||
|
||||
func (s *Space) Write(reader array.RecordReader, options *options.WriteOptions) error {
|
||||
return transaction.NewConcurrentWriteTransaction(s).Write(reader, options).Commit()
|
||||
}
|
||||
|
||||
func (s *Space) Delete(reader array.RecordReader) error {
|
||||
return transaction.NewConcurrentWriteTransaction(s).Delete(reader).Commit()
|
||||
}
|
||||
|
||||
// Open opened a space or create if the space does not exist.
|
||||
// If space does not exist. schema should not be nullptr, or an error will be returned.
|
||||
// If space exists and version is specified, it will restore to the state at this version,
|
||||
// or it will choose the latest version.
|
||||
func Open(uri string, opt options.Options) (*Space, error) {
|
||||
var f fs.Fs
|
||||
var m *manifest.Manifest
|
||||
var path string
|
||||
f, err := fs.BuildFileSystem(uri)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
path = f.Path()
|
||||
log.Debug("open space", log.String("path", path))
|
||||
|
||||
log.Debug(utils.GetManifestDir(path))
|
||||
// create if not exist
|
||||
if err = f.CreateDir(utils.GetManifestDir(path)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = f.CreateDir(utils.GetScalarDataDir(path)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = f.CreateDir(utils.GetVectorDataDir(path)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = f.CreateDir(utils.GetBlobDir(path)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = f.CreateDir(utils.GetDeleteDataDir(path)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rw := manifest.NewManifestReaderWriter(f, path)
|
||||
m, err = rw.Read(opt.Version)
|
||||
if err != nil {
|
||||
// create the first manifest file
|
||||
if err == manifest.ErrManifestNotFound {
|
||||
if opt.Schema == nil {
|
||||
log.Error("schema is nil")
|
||||
return nil, errors.ErrSchemaIsNil
|
||||
}
|
||||
if err = opt.Schema.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m = manifest.NewManifest(opt.Schema)
|
||||
m.SetVersion(0) // TODO: check if this is necessary
|
||||
if err = rw.Write(m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
space := NewSpace(f, path, m, opt.LockManager)
|
||||
return space, nil
|
||||
}
|
||||
|
||||
func (s *Space) readManifest(version int64) error {
|
||||
rw := manifest.NewManifestReaderWriter(s.fs, s.path)
|
||||
manifest, err := rw.Read(version)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.manifest = manifest
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Space) Read(readOptions *options.ReadOptions) (array.RecordReader, error) {
|
||||
if s.manifest == nil || readOptions.ManifestVersion != s.manifest.Version() {
|
||||
if err := s.readManifest(readOptions.ManifestVersion); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if s.manifest.GetSchema().Options().HasVersionColumn() {
|
||||
f := filter.NewConstantFilter(filter.LessThanOrEqual, s.manifest.GetSchema().Options().VersionColumn, int64(math.MaxInt64))
|
||||
readOptions.AddFilter(f)
|
||||
readOptions.AddColumn(s.manifest.GetSchema().Options().VersionColumn)
|
||||
}
|
||||
log.Debug("read", log.Any("readOption", readOptions))
|
||||
|
||||
return recordreader.MakeRecordReader(s.manifest, s.manifest.GetSchema(), s.fs, s.deleteFragments, readOptions), nil
|
||||
}
|
||||
|
||||
func (s *Space) WriteBlob(content []byte, name string, replace bool) error {
|
||||
return transaction.NewConcurrentWriteTransaction(s).WriteBlob(content, name, replace).Commit()
|
||||
}
|
||||
|
||||
func (s *Space) ReadBlob(name string, output []byte) (int, error) {
|
||||
blob, ok := s.manifest.GetBlob(name)
|
||||
if !ok {
|
||||
return -1, errors.ErrBlobNotExist
|
||||
}
|
||||
|
||||
f, err := s.fs.OpenFile(blob.File)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return f.Read(output)
|
||||
}
|
||||
|
||||
func (s *Space) GetBlobByteSize(name string) (int64, error) {
|
||||
blob, ok := s.manifest.GetBlob(name)
|
||||
if !ok {
|
||||
return -1, errors.ErrBlobNotExist
|
||||
}
|
||||
return blob.Size, nil
|
||||
}
|
||||
|
||||
func (s *Space) GetCurrentVersion() int64 {
|
||||
return s.manifest.Version()
|
||||
}
|
||||
|
||||
func (s *Space) ScanDelete() (array.RecordReader, error) {
|
||||
return recordreader.MakeScanDeleteReader(s.manifest, s.fs), nil
|
||||
}
|
||||
|
||||
func (s *Space) Path() string {
|
||||
return s.path
|
||||
}
|
||||
|
||||
func (s *Space) Fs() fs.Fs {
|
||||
return s.fs
|
||||
}
|
||||
|
||||
func (s *Space) Manifest() *manifest.Manifest {
|
||||
return s.manifest
|
||||
}
|
||||
|
||||
func (s *Space) SetManifest(manifest *manifest.Manifest) {
|
||||
s.manifest = manifest
|
||||
}
|
||||
|
||||
func (s *Space) LockManager() lock.LockManager {
|
||||
return s.lockManager
|
||||
}
|
||||
|
||||
func (s *Space) SetLockManager(lockManager lock.LockManager) {
|
||||
s.lockManager = lockManager
|
||||
}
|
||||
|
||||
func (s *Space) StatisticsBlobs() []blob.Blob {
|
||||
return s.manifest.GetBlobs()
|
||||
}
|
|
@ -0,0 +1,327 @@
|
|||
// Copyright 2023 Zilliz
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package transaction
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/apache/arrow/go/v12/arrow"
|
||||
"github.com/apache/arrow/go/v12/arrow/array"
|
||||
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/format/parquet"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||
)
|
||||
|
||||
type SpaceMeta interface {
|
||||
Path() string
|
||||
Fs() fs.Fs
|
||||
Manifest() *manifest.Manifest
|
||||
LockManager() lock.LockManager
|
||||
SetManifest(manifest *manifest.Manifest)
|
||||
}
|
||||
|
||||
type Transaction interface {
|
||||
Write(reader array.RecordReader, options *options.WriteOptions) Transaction
|
||||
Delete(reader array.RecordReader) Transaction
|
||||
WriteBlob(content []byte, name string, replace bool) Transaction
|
||||
Commit() error
|
||||
}
|
||||
|
||||
type ConcurrentWriteTransaction struct {
|
||||
operations []Operation
|
||||
commit manifest.ManifestCommit
|
||||
space SpaceMeta
|
||||
}
|
||||
|
||||
func (t *ConcurrentWriteTransaction) Write(reader array.RecordReader, options *options.WriteOptions) Transaction {
|
||||
operation := &WriteOperation{
|
||||
reader: reader,
|
||||
options: options,
|
||||
space: t.space,
|
||||
transaction: t,
|
||||
}
|
||||
t.operations = append(t.operations, operation)
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *ConcurrentWriteTransaction) Delete(reader array.RecordReader) Transaction {
|
||||
operation := &DeleteOperation{
|
||||
reader: reader,
|
||||
space: t.space,
|
||||
transaction: t,
|
||||
}
|
||||
t.operations = append(t.operations, operation)
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *ConcurrentWriteTransaction) WriteBlob(content []byte, name string, replace bool) Transaction {
|
||||
operation := &WriteBlobOperation{
|
||||
content: content,
|
||||
name: name,
|
||||
replace: replace,
|
||||
space: t.space,
|
||||
transaction: t,
|
||||
}
|
||||
t.operations = append(t.operations, operation)
|
||||
return t
|
||||
}
|
||||
|
||||
func (t *ConcurrentWriteTransaction) Commit() error {
|
||||
for _, op := range t.operations {
|
||||
op.Execute()
|
||||
}
|
||||
nxtManifest, err := t.commit.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.space.SetManifest(nxtManifest)
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewConcurrentWriteTransaction(space SpaceMeta) *ConcurrentWriteTransaction {
|
||||
return &ConcurrentWriteTransaction{
|
||||
operations: make([]Operation, 0),
|
||||
commit: manifest.NewManifestCommit(space.LockManager(), manifest.NewManifestReaderWriter(space.Fs(), space.Path())),
|
||||
space: space,
|
||||
}
|
||||
}
|
||||
|
||||
type Operation interface {
|
||||
Execute() error
|
||||
}
|
||||
|
||||
type WriteOperation struct {
|
||||
reader array.RecordReader
|
||||
options *options.WriteOptions
|
||||
space SpaceMeta
|
||||
transaction *ConcurrentWriteTransaction
|
||||
}
|
||||
|
||||
func (w *WriteOperation) Execute() error {
|
||||
if !w.space.Manifest().GetSchema().Schema().Equal(w.reader.Schema()) {
|
||||
return errors.ErrSchemaNotMatch
|
||||
}
|
||||
|
||||
scalarSchema, vectorSchema := w.space.Manifest().GetSchema().ScalarSchema(), w.space.Manifest().GetSchema().VectorSchema()
|
||||
var (
|
||||
scalarWriter format.Writer
|
||||
vectorWriter format.Writer
|
||||
)
|
||||
scalarFragment := fragment.NewFragment()
|
||||
vectorFragment := fragment.NewFragment()
|
||||
|
||||
isEmpty := true
|
||||
for w.reader.Next() {
|
||||
rec := w.reader.Record()
|
||||
|
||||
if rec.NumRows() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var err error
|
||||
scalarWriter, err = w.write(scalarSchema, rec, scalarWriter, &scalarFragment, w.options, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
vectorWriter, err = w.write(vectorSchema, rec, vectorWriter, &vectorFragment, w.options, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
isEmpty = false
|
||||
}
|
||||
|
||||
if scalarWriter != nil {
|
||||
if err := scalarWriter.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if vectorWriter != nil {
|
||||
if err := vectorWriter.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
op1 := manifest.AddScalarFragmentOp{ScalarFragment: scalarFragment}
|
||||
op2 := manifest.AddVectorFragmentOp{VectorFragment: vectorFragment}
|
||||
w.transaction.commit.AddOp(op1, op2)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *WriteOperation) write(
|
||||
schema *arrow.Schema,
|
||||
rec arrow.Record,
|
||||
writer format.Writer,
|
||||
fragment *fragment.Fragment,
|
||||
opt *options.WriteOptions,
|
||||
isScalar bool,
|
||||
) (format.Writer, error) {
|
||||
var columns []arrow.Array
|
||||
cols := rec.Columns()
|
||||
for k := range cols {
|
||||
_, has := schema.FieldsByName(rec.ColumnName(k))
|
||||
if has {
|
||||
columns = append(columns, cols[k])
|
||||
}
|
||||
}
|
||||
|
||||
var rootPath string
|
||||
if isScalar {
|
||||
// add offset column for scalar
|
||||
offsetValues := make([]int64, rec.NumRows())
|
||||
for i := 0; i < int(rec.NumRows()); i++ {
|
||||
offsetValues[i] = int64(i)
|
||||
}
|
||||
builder := array.NewInt64Builder(memory.DefaultAllocator)
|
||||
builder.AppendValues(offsetValues, nil)
|
||||
offsetColumn := builder.NewArray()
|
||||
columns = append(columns, offsetColumn)
|
||||
rootPath = utils.GetScalarDataDir(w.space.Path())
|
||||
} else {
|
||||
rootPath = utils.GetVectorDataDir(w.space.Path())
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
record := array.NewRecord(schema, columns, rec.NumRows())
|
||||
|
||||
if writer == nil {
|
||||
filePath := utils.GetNewParquetFilePath(rootPath)
|
||||
writer, err = parquet.NewFileWriter(schema, w.space.Fs(), filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fragment.AddFile(filePath)
|
||||
}
|
||||
|
||||
err = writer.Write(record)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if writer.Count() >= opt.MaxRecordPerFile {
|
||||
log.Debug("close writer", log.Any("count", writer.Count()))
|
||||
err = writer.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
writer = nil
|
||||
}
|
||||
|
||||
return writer, nil
|
||||
}
|
||||
|
||||
type DeleteOperation struct {
|
||||
reader array.RecordReader
|
||||
space SpaceMeta
|
||||
transaction *ConcurrentWriteTransaction
|
||||
}
|
||||
|
||||
func (o *DeleteOperation) Execute() error {
|
||||
schema := o.space.Manifest().GetSchema().DeleteSchema()
|
||||
fragment := fragment.NewFragment()
|
||||
var (
|
||||
err error
|
||||
writer format.Writer
|
||||
deleteFile string
|
||||
)
|
||||
|
||||
for o.reader.Next() {
|
||||
rec := o.reader.Record()
|
||||
if rec.NumRows() == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if writer == nil {
|
||||
deleteFile = utils.GetNewParquetFilePath(utils.GetDeleteDataDir(o.space.Path()))
|
||||
writer, err = parquet.NewFileWriter(schema, o.space.Fs(), deleteFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fragment.AddFile(deleteFile)
|
||||
}
|
||||
|
||||
if err = writer.Write(rec); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if writer != nil {
|
||||
if err = writer.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
op := manifest.AddDeleteFragmentOp{DeleteFragment: fragment}
|
||||
o.transaction.commit.AddOp(op)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type WriteBlobOperation struct {
|
||||
content []byte
|
||||
name string
|
||||
replace bool
|
||||
space SpaceMeta
|
||||
transaction *ConcurrentWriteTransaction
|
||||
}
|
||||
|
||||
func (o *WriteBlobOperation) Execute() error {
|
||||
if !o.replace && o.space.Manifest().HasBlob(o.name) {
|
||||
return errors.ErrBlobAlreadyExist
|
||||
}
|
||||
|
||||
blobFile := utils.GetBlobFilePath(o.space.Path())
|
||||
f, err := o.space.Fs().OpenFile(blobFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n, err := f.Write(o.content)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n != len(o.content) {
|
||||
return fmt.Errorf("blob not written completely, written %d but expect %d", n, len(o.content))
|
||||
}
|
||||
|
||||
if err = f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
op := manifest.AddBlobOp{
|
||||
Replace: o.replace,
|
||||
Blob: blob.Blob{
|
||||
Name: o.name,
|
||||
Size: int64(len(o.content)),
|
||||
File: blobFile,
|
||||
},
|
||||
}
|
||||
o.transaction.commit.AddOp(op)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package milvus.proto.storagev2;
|
||||
|
||||
option go_package = "github.com/milvus-io/milvus/pkg/proto/storagev2pb";
|
||||
|
||||
enum LogicType {
|
||||
NA = 0;
|
||||
BOOL = 1;
|
||||
UINT8 = 2;
|
||||
INT8 = 3;
|
||||
UINT16 = 4;
|
||||
INT16 = 5;
|
||||
UINT32 = 6;
|
||||
INT32 = 7;
|
||||
UINT64 = 8;
|
||||
INT64 = 9;
|
||||
HALF_FLOAT = 10;
|
||||
FLOAT = 11;
|
||||
DOUBLE = 12;
|
||||
STRING = 13;
|
||||
BINARY = 14;
|
||||
FIXED_SIZE_BINARY = 15;
|
||||
// DATE32 = 16;
|
||||
// DATE64 = 17;
|
||||
// TIMESTAMP = 18;
|
||||
// TIME32 = 19;
|
||||
// TIME64 = 20;
|
||||
// INTERVAL_MONTHS = 21;
|
||||
// INTERVAL_DAY_TIME = 22;
|
||||
// DECIMAL128 = 23;
|
||||
// option allow_alias = true;
|
||||
// DECIMAL = 23; // DECIMAL==DECIMAL128
|
||||
// DECIMAL256 = 24;
|
||||
LIST = 25;
|
||||
STRUCT = 26;
|
||||
// SPARSE_UNION = 27;
|
||||
// DENSE_UNION = 28;
|
||||
DICTIONARY = 29;
|
||||
MAP = 30;
|
||||
// EXTENSION = 31;
|
||||
FIXED_SIZE_LIST = 32;
|
||||
// DURATION = 33;
|
||||
// LARGE_STRING = 34;
|
||||
// LARGE_BINARY = 35;
|
||||
// LARGE_LIST = 36;
|
||||
// INTERVAL_MONTH_DAY_NANO = 37;
|
||||
// RUN_END_ENCODED = 38;
|
||||
MAX_ID = 39;
|
||||
}
|
||||
|
||||
enum Endianness {
|
||||
Little = 0;
|
||||
Big = 1;
|
||||
}
|
||||
|
||||
message FixedSizeBinaryType { int32 byte_width = 1; }
|
||||
|
||||
message FixedSizeListType { int32 list_size = 1; }
|
||||
|
||||
message DictionaryType {
|
||||
DataType index_type = 1;
|
||||
DataType value_type = 2;
|
||||
bool ordered = 3;
|
||||
}
|
||||
|
||||
message MapType { bool keys_sorted = 1; }
|
||||
|
||||
message DataType {
|
||||
oneof type_related_values {
|
||||
FixedSizeBinaryType fixed_size_binary_type = 1;
|
||||
FixedSizeListType fixed_size_list_type = 2;
|
||||
DictionaryType dictionary_type = 3;
|
||||
MapType map_type = 4;
|
||||
}
|
||||
LogicType logic_type = 100;
|
||||
repeated Field children = 101;
|
||||
}
|
||||
|
||||
message KeyValueMetadata {
|
||||
repeated string keys = 1;
|
||||
repeated string values = 2;
|
||||
}
|
||||
|
||||
message Field {
|
||||
string name = 1;
|
||||
bool nullable = 2;
|
||||
DataType data_type = 3;
|
||||
KeyValueMetadata metadata = 4;
|
||||
}
|
||||
|
||||
message SchemaOptions {
|
||||
string primary_column = 1;
|
||||
string version_column = 2;
|
||||
string vector_column = 3;
|
||||
}
|
||||
|
||||
message ArrowSchema {
|
||||
repeated Field fields = 1;
|
||||
Endianness endianness = 2;
|
||||
KeyValueMetadata metadata = 3;
|
||||
}
|
||||
|
||||
message Schema {
|
||||
ArrowSchema arrow_schema = 1;
|
||||
SchemaOptions schema_options = 2;
|
||||
}
|
||||
|
||||
|
||||
message Options { string uri = 1; }
|
||||
|
||||
message Manifest {
|
||||
int64 version = 1;
|
||||
Options options = 2;
|
||||
Schema schema = 3;
|
||||
repeated Fragment scalar_fragments = 4;
|
||||
repeated Fragment vector_fragments = 5;
|
||||
repeated Fragment delete_fragments = 6;
|
||||
repeated Blob blobs = 7;
|
||||
}
|
||||
|
||||
message Fragment {
|
||||
int64 id = 1;
|
||||
repeated string files = 2;
|
||||
}
|
||||
|
||||
message Blob {
|
||||
string name = 1;
|
||||
int64 size = 2;
|
||||
string file = 3;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -101,6 +101,7 @@ USE_ASAN="OFF"
|
|||
USE_DYNAMIC_SIMD="ON"
|
||||
USE_OPENDAL="OFF"
|
||||
INDEX_ENGINE="KNOWHERE"
|
||||
ENABLE_AZURE_FS="OFF"
|
||||
: "${ENABLE_GCP_NATIVE:="OFF"}"
|
||||
|
||||
while getopts "p:d:t:s:f:n:i:y:a:x:o:ulrcghzmebZ" arg; do
|
||||
|
@ -257,7 +258,8 @@ ${CMAKE_EXTRA_ARGS} \
|
|||
-DCPU_ARCH=${CPU_ARCH} \
|
||||
-DUSE_OPENDAL=${USE_OPENDAL} \
|
||||
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} "
|
||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \
|
||||
-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} "
|
||||
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
||||
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
||||
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
||||
|
|
|
@ -62,10 +62,12 @@ mkdir -p ./planpb
|
|||
mkdir -p ./workerpb
|
||||
mkdir -p ./messagespb
|
||||
mkdir -p ./streamingpb
|
||||
mkdir -p ./storagev2pb
|
||||
mkdir -p $ROOT_DIR/cmd/tools/migration/legacy/legacypb
|
||||
|
||||
protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=."
|
||||
|
||||
${protoc_opt} --go_out=paths=source_relative:./storagev2pb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./storagev2pb storagev2.proto || { echo 'generate storagev2.proto failed'; exit 1; }
|
||||
${protoc_opt} --go_out=paths=source_relative:./etcdpb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; }
|
||||
${protoc_opt} --go_out=paths=source_relative:./indexcgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; }
|
||||
${protoc_opt} --go_out=paths=source_relative:./cgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; }
|
||||
|
|
Loading…
Reference in New Issue