2021-10-09 10:03:34 +00:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
2021-06-18 07:20:08 +00:00
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
2021-10-09 10:03:34 +00:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2021-06-18 07:20:08 +00:00
|
|
|
//
|
2021-10-09 10:03:34 +00:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2021-06-18 07:20:08 +00:00
|
|
|
|
|
|
|
package roles
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2022-11-14 06:23:07 +00:00
|
|
|
"fmt"
|
2021-06-18 07:20:08 +00:00
|
|
|
"os"
|
|
|
|
"os/signal"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"syscall"
|
2022-12-07 10:01:19 +00:00
|
|
|
"time"
|
2021-06-18 07:20:08 +00:00
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/tracer"
|
|
|
|
|
2022-10-14 06:19:24 +00:00
|
|
|
"go.uber.org/zap"
|
2022-10-13 02:37:23 +00:00
|
|
|
|
2021-06-18 07:20:08 +00:00
|
|
|
"github.com/milvus-io/milvus/cmd/components"
|
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
2023-01-06 06:21:37 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/management"
|
2022-10-14 06:19:24 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/management/healthz"
|
2021-06-18 07:20:08 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/metrics"
|
2023-01-06 06:21:37 +00:00
|
|
|
rocksmqimpl "github.com/milvus-io/milvus/internal/mq/mqimpl/rocksmq/server"
|
2022-10-10 07:55:22 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
2021-12-29 06:35:21 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/etcd"
|
2022-11-08 06:33:03 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/logutil"
|
2021-12-14 10:31:25 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/metricsinfo"
|
2021-12-29 06:35:21 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/paramtable"
|
2023-01-06 08:53:35 +00:00
|
|
|
_ "github.com/milvus-io/milvus/internal/util/symbolizer" // support symbolizer and crash dump
|
2021-12-28 15:12:46 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
2022-10-10 07:55:22 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2021-06-18 07:20:08 +00:00
|
|
|
)
|
|
|
|
|
2022-03-18 07:51:22 +00:00
|
|
|
// all milvus related metrics is in a separate registry
|
|
|
|
var Registry *prometheus.Registry
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
Registry = prometheus.NewRegistry()
|
|
|
|
Registry.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}))
|
|
|
|
Registry.MustRegister(prometheus.NewGoCollector())
|
2022-09-02 09:46:59 +00:00
|
|
|
metrics.RegisterEtcdMetrics(Registry)
|
2022-03-18 07:51:22 +00:00
|
|
|
}
|
|
|
|
|
2021-12-02 02:27:32 +00:00
|
|
|
func stopRocksmq() {
|
2022-03-03 13:57:56 +00:00
|
|
|
rocksmqimpl.CloseRocksMQ()
|
2021-12-02 02:27:32 +00:00
|
|
|
}
|
|
|
|
|
2022-10-14 06:19:24 +00:00
|
|
|
type component interface {
|
|
|
|
healthz.Indicator
|
|
|
|
Run() error
|
|
|
|
Stop() error
|
|
|
|
}
|
|
|
|
|
|
|
|
func runComponent[T component](ctx context.Context,
|
|
|
|
localMsg bool,
|
2023-01-12 08:09:39 +00:00
|
|
|
runWg *sync.WaitGroup,
|
2022-10-14 06:19:24 +00:00
|
|
|
creator func(context.Context, dependency.Factory) (T, error),
|
|
|
|
metricRegister func(*prometheus.Registry)) T {
|
|
|
|
var role T
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
factory := dependency.NewFactory(localMsg)
|
|
|
|
var err error
|
|
|
|
role, err = creator(ctx, factory)
|
|
|
|
if localMsg {
|
2022-11-08 06:33:03 +00:00
|
|
|
paramtable.SetRole(typeutil.StandaloneRole)
|
2022-10-14 06:19:24 +00:00
|
|
|
} else {
|
2022-11-08 06:33:03 +00:00
|
|
|
paramtable.SetRole(role.GetName())
|
2022-10-14 06:19:24 +00:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
wg.Done()
|
|
|
|
_ = role.Run()
|
2023-01-12 08:09:39 +00:00
|
|
|
runWg.Done()
|
2022-10-14 06:19:24 +00:00
|
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
healthz.Register(role)
|
|
|
|
metricRegister(Registry)
|
|
|
|
return role
|
|
|
|
}
|
|
|
|
|
2022-02-24 06:41:52 +00:00
|
|
|
// MilvusRoles decides which components are brought up with Milvus.
|
2021-06-18 07:20:08 +00:00
|
|
|
type MilvusRoles struct {
|
2021-12-06 10:20:31 +00:00
|
|
|
EnableRootCoord bool `env:"ENABLE_ROOT_COORD"`
|
|
|
|
EnableProxy bool `env:"ENABLE_PROXY"`
|
|
|
|
EnableQueryCoord bool `env:"ENABLE_QUERY_COORD"`
|
|
|
|
EnableQueryNode bool `env:"ENABLE_QUERY_NODE"`
|
|
|
|
EnableDataCoord bool `env:"ENABLE_DATA_COORD"`
|
|
|
|
EnableDataNode bool `env:"ENABLE_DATA_NODE"`
|
|
|
|
EnableIndexCoord bool `env:"ENABLE_INDEX_COORD"`
|
|
|
|
EnableIndexNode bool `env:"ENABLE_INDEX_NODE"`
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2021-11-01 14:35:18 +00:00
|
|
|
// EnvValue not used now.
|
2021-06-18 07:20:08 +00:00
|
|
|
func (mr *MilvusRoles) EnvValue(env string) bool {
|
|
|
|
env = strings.ToLower(env)
|
|
|
|
env = strings.Trim(env, " ")
|
|
|
|
return env == "1" || env == "true"
|
|
|
|
}
|
|
|
|
|
2022-06-10 13:58:08 +00:00
|
|
|
func (mr *MilvusRoles) printLDPreLoad() {
|
|
|
|
const LDPreLoad = "LD_PRELOAD"
|
|
|
|
val, ok := os.LookupEnv(LDPreLoad)
|
|
|
|
if ok {
|
|
|
|
log.Info("Enable Jemalloc", zap.String("Jemalloc Path", val))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runRootCoord(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.RootCoord {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewRootCoord, metrics.RegisterRootCoord)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runProxy(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.Proxy {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewProxy, metrics.RegisterProxy)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runQueryCoord(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.QueryCoord {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewQueryCoord, metrics.RegisterQueryCoord)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runQueryNode(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.QueryNode {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewQueryNode, metrics.RegisterQueryNode)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runDataCoord(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.DataCoord {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewDataCoord, metrics.RegisterDataCoord)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runDataNode(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.DataNode {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewDataNode, metrics.RegisterDataNode)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runIndexCoord(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.IndexCoord {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewIndexCoord, func(registry *prometheus.Registry) {})
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
func (mr *MilvusRoles) runIndexNode(ctx context.Context, localMsg bool, wg *sync.WaitGroup) *components.IndexNode {
|
|
|
|
wg.Add(1)
|
|
|
|
return runComponent(ctx, localMsg, wg, components.NewIndexNode, metrics.RegisterIndexNode)
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2022-11-08 06:33:03 +00:00
|
|
|
func (mr *MilvusRoles) setupLogger() {
|
2023-02-23 03:37:46 +00:00
|
|
|
params := paramtable.Get()
|
|
|
|
logConfig := log.Config{
|
|
|
|
Level: params.LogCfg.Level.GetValue(),
|
|
|
|
GrpcLevel: params.LogCfg.GrpcLogLevel.GetValue(),
|
|
|
|
Format: params.LogCfg.Format.GetValue(),
|
|
|
|
Stdout: params.LogCfg.Stdout.GetAsBool(),
|
|
|
|
File: log.FileLogConfig{
|
|
|
|
RootPath: params.LogCfg.RootPath.GetValue(),
|
|
|
|
MaxSize: params.LogCfg.MaxSize.GetAsInt(),
|
|
|
|
MaxDays: params.LogCfg.MaxAge.GetAsInt(),
|
|
|
|
MaxBackups: params.LogCfg.MaxBackups.GetAsInt(),
|
|
|
|
},
|
|
|
|
}
|
2022-11-08 06:33:03 +00:00
|
|
|
id := paramtable.GetNodeID()
|
|
|
|
roleName := paramtable.GetRole()
|
|
|
|
rootPath := logConfig.File.RootPath
|
|
|
|
if rootPath != "" {
|
2022-11-14 06:23:07 +00:00
|
|
|
logConfig.File.Filename = fmt.Sprintf("%s-%d.log", roleName, id)
|
2022-11-08 06:33:03 +00:00
|
|
|
} else {
|
|
|
|
logConfig.File.Filename = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
logutil.SetupLogger(&logConfig)
|
|
|
|
}
|
|
|
|
|
2021-12-13 01:29:50 +00:00
|
|
|
// Run Milvus components.
|
2021-12-29 06:35:21 +00:00
|
|
|
func (mr *MilvusRoles) Run(local bool, alias string) {
|
2022-04-14 12:01:34 +00:00
|
|
|
log.Info("starting running Milvus components")
|
2021-06-18 07:20:08 +00:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
2022-12-06 14:59:19 +00:00
|
|
|
defer func() {
|
|
|
|
// some deferred Stop has race with context cancel
|
|
|
|
cancel()
|
|
|
|
}()
|
2022-06-10 13:58:08 +00:00
|
|
|
mr.printLDPreLoad()
|
2021-06-18 07:20:08 +00:00
|
|
|
|
|
|
|
// only standalone enable localMsg
|
2021-12-29 06:35:21 +00:00
|
|
|
if local {
|
2021-12-14 10:31:25 +00:00
|
|
|
if err := os.Setenv(metricsinfo.DeployModeEnvKey, metricsinfo.StandaloneDeployMode); err != nil {
|
|
|
|
log.Error("Failed to set deploy mode: ", zap.Error(err))
|
|
|
|
}
|
2022-12-16 07:59:23 +00:00
|
|
|
|
2022-11-04 06:25:38 +00:00
|
|
|
paramtable.Init()
|
|
|
|
params := paramtable.Get()
|
2021-12-02 02:27:32 +00:00
|
|
|
|
2022-11-04 06:25:38 +00:00
|
|
|
if params.RocksmqEnable() {
|
|
|
|
path, err := params.Load("rocksmq.path")
|
2022-04-12 11:47:33 +00:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2022-08-02 13:26:33 +00:00
|
|
|
|
|
|
|
if err = rocksmqimpl.InitRocksMQ(path); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2022-04-12 11:47:33 +00:00
|
|
|
defer stopRocksmq()
|
2021-12-02 02:27:32 +00:00
|
|
|
}
|
2021-12-29 06:35:21 +00:00
|
|
|
|
2022-11-17 10:59:09 +00:00
|
|
|
if params.EtcdCfg.UseEmbedEtcd.GetAsBool() {
|
2022-02-24 06:41:52 +00:00
|
|
|
// Start etcd server.
|
2022-11-30 10:23:15 +00:00
|
|
|
etcd.InitEtcdServer(
|
|
|
|
params.EtcdCfg.UseEmbedEtcd.GetAsBool(),
|
|
|
|
params.EtcdCfg.ConfigPath.GetValue(),
|
|
|
|
params.EtcdCfg.DataDir.GetValue(),
|
|
|
|
params.EtcdCfg.EtcdLogPath.GetValue(),
|
|
|
|
params.EtcdCfg.EtcdLogLevel.GetValue())
|
2021-12-29 06:35:21 +00:00
|
|
|
defer etcd.StopEtcdServer()
|
|
|
|
}
|
2023-01-12 08:09:39 +00:00
|
|
|
paramtable.SetRole(typeutil.StandaloneRole)
|
2021-09-03 11:35:47 +00:00
|
|
|
} else {
|
2021-12-14 10:31:25 +00:00
|
|
|
if err := os.Setenv(metricsinfo.DeployModeEnvKey, metricsinfo.ClusterDeployMode); err != nil {
|
2021-10-28 10:02:30 +00:00
|
|
|
log.Error("Failed to set deploy mode: ", zap.Error(err))
|
2021-09-03 11:35:47 +00:00
|
|
|
}
|
2022-11-04 06:25:38 +00:00
|
|
|
paramtable.Init()
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|
|
|
|
|
2023-02-02 04:03:58 +00:00
|
|
|
management.ServeHTTP()
|
|
|
|
|
2021-06-18 07:20:08 +00:00
|
|
|
var rc *components.RootCoord
|
2023-01-12 08:09:39 +00:00
|
|
|
var wg sync.WaitGroup
|
2021-06-18 07:20:08 +00:00
|
|
|
if mr.EnableRootCoord {
|
2023-01-12 08:09:39 +00:00
|
|
|
rc = mr.runRootCoord(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if rc != nil {
|
|
|
|
defer rc.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var pn *components.Proxy
|
|
|
|
if mr.EnableProxy {
|
2023-01-12 08:09:39 +00:00
|
|
|
pn = mr.runProxy(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if pn != nil {
|
|
|
|
defer pn.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var qs *components.QueryCoord
|
|
|
|
if mr.EnableQueryCoord {
|
2023-01-12 08:09:39 +00:00
|
|
|
qs = mr.runQueryCoord(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if qs != nil {
|
|
|
|
defer qs.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var qn *components.QueryNode
|
|
|
|
if mr.EnableQueryNode {
|
2023-01-12 08:09:39 +00:00
|
|
|
qn = mr.runQueryNode(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if qn != nil {
|
|
|
|
defer qn.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var ds *components.DataCoord
|
|
|
|
if mr.EnableDataCoord {
|
2023-01-12 08:09:39 +00:00
|
|
|
ds = mr.runDataCoord(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if ds != nil {
|
|
|
|
defer ds.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var dn *components.DataNode
|
|
|
|
if mr.EnableDataNode {
|
2023-01-12 08:09:39 +00:00
|
|
|
dn = mr.runDataNode(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if dn != nil {
|
|
|
|
defer dn.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var is *components.IndexCoord
|
|
|
|
if mr.EnableIndexCoord {
|
2023-01-12 08:09:39 +00:00
|
|
|
is = mr.runIndexCoord(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if is != nil {
|
|
|
|
defer is.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var in *components.IndexNode
|
|
|
|
if mr.EnableIndexNode {
|
2023-01-12 08:09:39 +00:00
|
|
|
in = mr.runIndexNode(ctx, local, &wg)
|
2021-06-18 07:20:08 +00:00
|
|
|
if in != nil {
|
|
|
|
defer in.Stop()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
wg.Wait()
|
2022-11-14 06:23:07 +00:00
|
|
|
|
2023-01-12 08:09:39 +00:00
|
|
|
mr.setupLogger()
|
|
|
|
tracer.Init()
|
2022-10-13 02:37:23 +00:00
|
|
|
metrics.Register(Registry)
|
2022-12-07 10:01:19 +00:00
|
|
|
|
|
|
|
paramtable.SetCreateTime(time.Now())
|
|
|
|
paramtable.SetUpdateTime(time.Now())
|
|
|
|
|
2021-06-18 07:20:08 +00:00
|
|
|
sc := make(chan os.Signal, 1)
|
|
|
|
signal.Notify(sc,
|
|
|
|
syscall.SIGHUP,
|
|
|
|
syscall.SIGINT,
|
|
|
|
syscall.SIGTERM,
|
|
|
|
syscall.SIGQUIT)
|
|
|
|
sig := <-sc
|
2021-09-22 11:48:07 +00:00
|
|
|
log.Error("Get signal to exit\n", zap.String("signal", sig.String()))
|
2021-06-18 07:20:08 +00:00
|
|
|
}
|