Add read result rate protection (#19728)

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
pull/19756/head
bigsheeper 2022-10-13 14:57:24 +08:00 committed by GitHub
parent ab88dd77e7
commit f1e330a997
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 152 additions and 45 deletions

View File

@ -373,6 +373,7 @@ common:
# 1. DML throughput limitation;
# 2. DDL, DQL qps/rps limitation;
# 3. DQL Queue length/latency protection;
# 4. DQL result rate protection;
# If necessary, you can also manually force to deny RW requests.
quotaAndLimits:
enabled: false # `true` to enable quota and limits, `false` to disable.
@ -383,38 +384,38 @@ quotaAndLimits:
ddl: # ddl limit rates, default no limit.
enabled: false
collectionRate: # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
partitionRate: # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
indexRate:
enabled: false
max: # qps, default no limit, rate for CreateIndex, DropIndex
max: -1 # qps, default no limit, rate for CreateIndex, DropIndex
flushRate:
enabled: false
max: # qps, default no limit, rate for flush
max: -1 # qps, default no limit, rate for flush
compactionRate:
enabled: false
max: # qps, default no limit, rate for manualCompaction
max: -1 # qps, default no limit, rate for manualCompaction
# dml limit rates, default no limit.
# The maximum rate will not be greater than `max`.
dml:
enabled: false
insertRate:
max: # MB/s, default no limit
max: -1 # MB/s, default no limit
deleteRate:
max: # MB/s, default no limit
max: -1 # MB/s, default no limit
bulkLoadRate: # not support yet. TODO: limit bulkLoad rate
max: # MB/s, default no limit
max: -1 # MB/s, default no limit
# dql limit rates, default no limit.
# The maximum rate will not be greater than `max`.
dql:
enabled: false
searchRate:
max: # vps (vectors per second), default no limit
max: -1 # vps (vectors per second), default no limit
queryRate:
max: # qps, default no limit
max: -1 # qps, default no limit
# limitWriting decides whether dml requests are allowed.
limitWriting:
@ -440,29 +441,33 @@ quotaAndLimits:
diskProtection:
# When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
enabled: true
diskQuota: -1 # GB, (0, +inf), -1 means use default +inf
diskQuota: -1 # GB, (0, +inf), default no limit
# limitReading decides whether dql requests are allowed.
limitReading:
# forceDeny `false` means dql requests are allowed (except for some
# specific conditions, such as collection has been dropped), `true` means always reject all dql requests.
forceDeny: false
queueProtection:
enabled: false
# nqInQueueThreshold indicated that the system was under backpressure for Search/Query path.
# If NQ in any QueryNode's queue is greater than nqInQueueThreshold, search&query rates would gradually cool off
# until the NQ in queue no longer exceeds nqInQueueThreshold. We think of the NQ of query request as 1.
nqInQueueThreshold: # int, default no limit
nqInQueueThreshold: -1 # int, default no limit
# queueLatencyThreshold indicated that the system was under backpressure for Search/Query path.
# If dql latency of queuing is greater than queueLatencyThreshold, search&query rates would gradually cool off
# until the latency of queuing no longer exceeds queueLatencyThreshold.
# The latency here refers to the averaged latency over a period of time.
queueLatencyThreshold: # milliseconds, default no limit
# coolOffSpeed is the speed of search&query rates cool off.
coolOffSpeed: 0.9 # (0, 1]
queueLatencyThreshold: -1 # milliseconds, default no limit
resultProtection:
enabled: false
# maxReadResultRate indicated that the system was under backpressure for Search/Query path.
# If dql result rate is greater than maxReadResultRate, search&query rates would gradually cool off
# until the read result rate no longer exceeds maxReadResultRate.
maxReadResultRate: -1 # MB/s, default no limit
# coolOffSpeed is the speed of search&query rates cool off.
coolOffSpeed: 0.9 # (0, 1]
# AutoIndexConfig
autoIndex:

View File

@ -2850,6 +2850,7 @@ func (node *Proxy) Search(ctx context.Context, request *milvuspb.SearchRequest)
if qt.result != nil {
sentSize := proto.Size(qt.result)
metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10)).Add(float64(sentSize))
rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize))
}
return qt.result, nil
}
@ -3068,6 +3069,7 @@ func (node *Proxy) Query(ctx context.Context, request *milvuspb.QueryRequest) (*
FieldsData: qt.result.FieldsData,
}
sentSize := proto.Size(qt.result)
rateCol.Add(metricsinfo.ReadResultThroughput, float64(sentSize))
metrics.ProxyReadReqSendBytes.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10)).Add(float64(sentSize))
return ret, nil
}

View File

@ -35,21 +35,22 @@ type showConfigurationsFuncType func(ctx context.Context, request *internalpb.Sh
func getQuotaMetrics() (*metricsinfo.ProxyQuotaMetrics, error) {
var err error
rms := make([]metricsinfo.RateMetric, 0)
getRateMetric := func(rateType internalpb.RateType) {
rate, err2 := rateCol.Rate(rateType.String(), ratelimitutil.DefaultAvgDuration)
getRateMetric := func(label string) {
rate, err2 := rateCol.Rate(label, ratelimitutil.DefaultAvgDuration)
if err2 != nil {
err = err2
return
}
rms = append(rms, metricsinfo.RateMetric{
Label: rateType.String(),
Label: label,
Rate: rate,
})
}
getRateMetric(internalpb.RateType_DMLInsert)
getRateMetric(internalpb.RateType_DMLDelete)
getRateMetric(internalpb.RateType_DQLSearch)
getRateMetric(internalpb.RateType_DQLQuery)
getRateMetric(internalpb.RateType_DMLInsert.String())
getRateMetric(internalpb.RateType_DMLDelete.String())
getRateMetric(internalpb.RateType_DQLSearch.String())
getRateMetric(internalpb.RateType_DQLQuery.String())
getRateMetric(metricsinfo.ReadResultThroughput)
if err != nil {
return nil, err
}

View File

@ -169,6 +169,7 @@ func (node *Proxy) initRateCollector() error {
// TODO: add bulkLoad rate
rateCol.Register(internalpb.RateType_DQLSearch.String())
rateCol.Register(internalpb.RateType_DQLQuery.String())
rateCol.Register(metricsinfo.ReadResultThroughput)
return nil
}

View File

@ -77,6 +77,7 @@ type Limit = ratelimitutil.Limit
// 3. Disk quota protection -> force deny writing if exceeded
// 4. DQL Queue length protection -> dqlRate = curDQLRate * CoolOffSpeed
// 5. DQL queue latency protection -> dqlRate = curDQLRate * CoolOffSpeed
// 6. Search result protection -> searchRate = curSearchRate * CoolOffSpeed
// If necessary, user can also manually force to deny RW requests.
type QuotaCenter struct {
// clients
@ -285,9 +286,6 @@ func (q *QuotaCenter) calculateReadRates() {
q.forceDenyReading(ManualForceDeny)
return
}
if !Params.QuotaConfig.QueueProtectionEnabled {
return
}
coolOffSpeed := Params.QuotaConfig.CoolOffSpeed
coolOff := func(realTimeSearchRate float64, realTimeQueryRate float64) {
@ -316,6 +314,13 @@ func (q *QuotaCenter) calculateReadRates() {
log.Debug("QuotaCenter checkNQInQuery done", zap.Float64("queueLengthFactor", queueLengthFactor))
if Limit(queueLengthFactor) == Limit(coolOffSpeed) {
coolOff(realTimeSearchRate, realTimeQueryRate)
return
}
resultRateFactor := q.checkReadResultRate()
log.Debug("QuotaCenter checkReadResultRate done", zap.Float64("resultRateFactor", resultRateFactor))
if Limit(resultRateFactor) == Limit(coolOffSpeed) {
coolOff(realTimeSearchRate, realTimeQueryRate)
}
}
@ -443,15 +448,18 @@ func (q *QuotaCenter) timeTickDelay() (float64, error) {
// checkNQInQuery checks search&query nq in QueryNode,
// and return the factor according to NQInQueueThreshold.
func (q *QuotaCenter) checkNQInQuery() float64 {
if !Params.QuotaConfig.QueueProtectionEnabled {
return 1
}
sum := func(ri metricsinfo.ReadInfoInQueue) int64 {
return ri.UnsolvedQueue + ri.ReadyQueue + ri.ReceiveChan + ri.ExecuteChan
}
factor := float64(1)
nqInQueueThreshold := Params.QuotaConfig.NQInQueueThreshold
if nqInQueueThreshold < 0 {
// < 0 means disable queue length protection
return factor
return 1
}
for _, metric := range q.queryNodeMetrics {
searchNQSum := sum(metric.SearchQueue)
@ -461,17 +469,20 @@ func (q *QuotaCenter) checkNQInQuery() float64 {
return Params.QuotaConfig.CoolOffSpeed
}
}
return factor
return 1
}
// checkQueryLatency checks queueing latency in QueryNode for search&query requests,
// and return the factor according to QueueLatencyThreshold.
func (q *QuotaCenter) checkQueryLatency() float64 {
factor := float64(1)
if !Params.QuotaConfig.QueueProtectionEnabled {
return 1
}
queueLatencyThreshold := Params.QuotaConfig.QueueLatencyThreshold
if queueLatencyThreshold < 0 {
// < 0 means disable queue latency protection
return factor
return 1
}
for _, metric := range q.queryNodeMetrics {
searchLatency := metric.SearchQueue.AvgQueueDuration
@ -480,7 +491,29 @@ func (q *QuotaCenter) checkQueryLatency() float64 {
return Params.QuotaConfig.CoolOffSpeed
}
}
return factor
return 1
}
// checkReadResultRate checks search result rate in Proxy,
// and return the factor according to MaxReadResultRate.
func (q *QuotaCenter) checkReadResultRate() float64 {
if !Params.QuotaConfig.ResultProtectionEnabled {
return 1
}
maxRate := Params.QuotaConfig.MaxReadResultRate
rateCount := float64(0)
for _, metric := range q.proxyMetrics {
for _, rm := range metric.Rms {
if rm.Label == metricsinfo.ReadResultThroughput {
rateCount += rm.Rate
}
}
}
if rateCount >= maxRate {
return Params.QuotaConfig.CoolOffSpeed
}
return 1
}
// memoryToWaterLevel checks whether any node has memory resource issue,

View File

@ -244,8 +244,33 @@ func TestQuotaCenter(t *testing.T) {
}}
factor = quotaCenter.checkQueryLatency()
assert.Equal(t, 1.0, factor)
//ok := math.Abs(factor-1.0) < 0.0001
//assert.True(t, ok)
})
t.Run("test checkReadResult", func(t *testing.T) {
quotaCenter := NewQuotaCenter(pcm, &queryCoordMockForQuota{}, &dataCoordMockForQuota{}, core.tsoAllocator)
factor := quotaCenter.checkReadResultRate()
assert.Equal(t, float64(1), factor)
// test cool off
Params.QuotaConfig.ResultProtectionEnabled = true
Params.QuotaConfig.MaxReadResultRate = 1
quotaCenter.proxyMetrics = []*metricsinfo.ProxyQuotaMetrics{{
Rms: []metricsinfo.RateMetric{
{Label: metricsinfo.ReadResultThroughput, Rate: 1.2},
},
}}
factor = quotaCenter.checkReadResultRate()
assert.Equal(t, Params.QuotaConfig.CoolOffSpeed, factor)
// test no cool off
quotaCenter.proxyMetrics = []*metricsinfo.ProxyQuotaMetrics{{
Rms: []metricsinfo.RateMetric{
{Label: metricsinfo.ReadResultThroughput, Rate: 0.8},
},
}}
factor = quotaCenter.checkReadResultRate()
assert.Equal(t, 1.0, factor)
})
t.Run("test calculateReadRates", func(t *testing.T) {
@ -278,6 +303,20 @@ func TestQuotaCenter(t *testing.T) {
quotaCenter.calculateReadRates()
assert.Equal(t, Limit(100.0*0.9), quotaCenter.currentRates[internalpb.RateType_DQLSearch])
assert.Equal(t, Limit(100.0*0.9), quotaCenter.currentRates[internalpb.RateType_DQLQuery])
Params.QuotaConfig.ResultProtectionEnabled = true
Params.QuotaConfig.MaxReadResultRate = 1
quotaCenter.proxyMetrics = []*metricsinfo.ProxyQuotaMetrics{{
Rms: []metricsinfo.RateMetric{
{Label: internalpb.RateType_DQLSearch.String(), Rate: 100},
{Label: internalpb.RateType_DQLQuery.String(), Rate: 100},
{Label: metricsinfo.ReadResultThroughput, Rate: 1.2},
},
}}
quotaCenter.queryNodeMetrics = []*metricsinfo.QueryNodeQuotaMetrics{{SearchQueue: metricsinfo.ReadInfoInQueue{}}}
quotaCenter.calculateReadRates()
assert.Equal(t, Limit(100.0*0.9), quotaCenter.currentRates[internalpb.RateType_DQLSearch])
assert.Equal(t, Limit(100.0*0.9), quotaCenter.currentRates[internalpb.RateType_DQLQuery])
})
t.Run("test calculateWriteRates", func(t *testing.T) {

View File

@ -28,6 +28,7 @@ type RateMetricLabel = string
const (
NQPerSecond RateMetricLabel = "NQPerSecond"
SearchThroughput RateMetricLabel = "SearchThroughput"
ReadResultThroughput RateMetricLabel = "ReadResultThroughput"
InsertConsumeThroughput RateMetricLabel = "InsertConsumeThroughput"
DeleteConsumeThroughput RateMetricLabel = "DeleteConsumeThroughput"
)

View File

@ -29,10 +29,10 @@ import (
const (
// defaultMax is the default unlimited rate or threshold.
defaultMax = float64(math.MaxFloat64)
// GB used to convert gigabytes and bytes.
GB = 1024.0 * 1024.0 * 1024.0
// GBSize used to convert gigabytes and bytes.
GBSize = 1024.0 * 1024.0 * 1024.0
// defaultDiskQuotaInGB is the default disk quota in gigabytes.
defaultDiskQuotaInGB = defaultMax / GB
defaultDiskQuotaInGB = defaultMax / GBSize
// defaultMin is the default minimal rate.
defaultMin = float64(0)
// defaultLowWaterLevel is the default memory low water level.
@ -93,11 +93,13 @@ type quotaConfig struct {
DiskQuota float64
// limit reading
ForceDenyReading bool
QueueProtectionEnabled bool
NQInQueueThreshold int64
QueueLatencyThreshold float64
CoolOffSpeed float64
ForceDenyReading bool
QueueProtectionEnabled bool
NQInQueueThreshold int64
QueueLatencyThreshold float64
ResultProtectionEnabled bool
MaxReadResultRate float64
CoolOffSpeed float64
}
func (p *quotaConfig) init(base *BaseTable) {
@ -154,6 +156,8 @@ func (p *quotaConfig) init(base *BaseTable) {
p.initQueueProtectionEnabled()
p.initNQInQueueThreshold()
p.initQueueLatencyThreshold()
p.initResultProtectionEnabled()
p.initMaxReadResultRate()
p.initCoolOffSpeed()
}
@ -520,7 +524,7 @@ func (p *quotaConfig) initDiskQuota() {
p.DiskQuota = defaultDiskQuotaInGB
}
// gigabytes to bytes
p.DiskQuota = p.DiskQuota * GB
p.DiskQuota = p.DiskQuota * GBSize
}
func (p *quotaConfig) initForceDenyReading() {
@ -553,13 +557,32 @@ func (p *quotaConfig) initQueueLatencyThreshold() {
}
}
func (p *quotaConfig) initResultProtectionEnabled() {
p.ResultProtectionEnabled = p.Base.ParseBool("quotaAndLimits.limitReading.resultProtection.enabled", false)
}
func (p *quotaConfig) initMaxReadResultRate() {
if !p.ResultProtectionEnabled {
p.MaxReadResultRate = defaultMax
return
}
p.MaxReadResultRate = p.Base.ParseFloatWithDefault("quotaAndLimits.limitReading.resultProtection.maxReadResultRate", defaultMax)
if math.Abs(p.MaxReadResultRate-defaultMax) > 0.001 { // maxRate != defaultMax
p.MaxReadResultRate = megaBytesRate2Bytes(p.MaxReadResultRate)
}
// [0, inf)
if p.MaxReadResultRate < 0 {
p.MaxReadResultRate = defaultMax
}
}
func (p *quotaConfig) initCoolOffSpeed() {
const defaultSpeed = 0.9
p.CoolOffSpeed = defaultSpeed
if !p.QueueProtectionEnabled {
return
}
p.CoolOffSpeed = p.Base.ParseFloatWithDefault("quotaAndLimits.limitReading.queueProtection.coolOffSpeed", defaultSpeed)
p.CoolOffSpeed = p.Base.ParseFloatWithDefault("quotaAndLimits.limitReading.coolOffSpeed", defaultSpeed)
// (0, 1]
if p.CoolOffSpeed <= 0 || p.CoolOffSpeed > 1 {
log.Warn("CoolOffSpeed must in the range of `(0, 1]`, use default value", zap.Float64("speed", p.CoolOffSpeed), zap.Float64("default", defaultSpeed))

View File

@ -86,6 +86,8 @@ func TestQuotaParam(t *testing.T) {
assert.Equal(t, false, qc.QueueProtectionEnabled)
assert.Equal(t, int64(0), qc.NQInQueueThreshold)
assert.Equal(t, float64(0), qc.QueueLatencyThreshold)
assert.Equal(t, false, qc.ResultProtectionEnabled)
assert.Equal(t, defaultMax, qc.MaxReadResultRate)
assert.Equal(t, 0.9, qc.CoolOffSpeed)
})
}