2021-12-29 14:49:59 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
2021-04-19 07:15:33 +00:00
// with the License. You may obtain a copy of the License at
//
2021-12-29 14:49:59 +00:00
// http://www.apache.org/licenses/LICENSE-2.0
2021-04-19 07:15:33 +00:00
//
2021-12-29 14:49:59 +00:00
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-04-19 07:15:33 +00:00
2021-01-26 01:38:40 +00:00
// Copyright 2016 TiKV Project Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
2021-02-24 09:12:06 +00:00
package tso
2021-01-26 01:38:40 +00:00
import (
2023-03-31 07:22:23 +00:00
"context"
2021-01-26 01:38:40 +00:00
"sync/atomic"
"time"
"unsafe"
2023-04-06 11:14:32 +00:00
"github.com/cockroachdb/errors"
2021-01-26 01:38:40 +00:00
"go.uber.org/zap"
2024-06-25 13:18:15 +00:00
"github.com/milvus-io/milvus/pkg/kv"
2023-04-06 11:14:32 +00:00
"github.com/milvus-io/milvus/pkg/log"
2023-09-21 01:45:27 +00:00
"github.com/milvus-io/milvus/pkg/util/tsoutil"
2023-04-06 11:14:32 +00:00
"github.com/milvus-io/milvus/pkg/util/typeutil"
2021-01-26 01:38:40 +00:00
)
const (
// UpdateTimestampStep is used to update timestamp.
UpdateTimestampStep = 50 * time . Millisecond
// updateTimestampGuard is the min timestamp interval.
updateTimestampGuard = time . Millisecond
// maxLogical is the max upper limit for logical time.
// When a TSO's logical time reaches this limit,
// the physical time will be forced to increase.
maxLogical = int64 ( 1 << 18 )
)
// atomicObject is used to store the current TSO in memory.
type atomicObject struct {
physical time . Time
logical int64
}
// timestampOracle is used to maintain the logic of tso.
type timestampOracle struct {
2021-04-12 10:09:28 +00:00
key string
txnKV kv . TxnKV
2021-01-26 01:38:40 +00:00
// TODO: remove saveInterval
saveInterval time . Duration
maxResetTSGap func ( ) time . Duration
// For tso, set after the PD becomes a leader.
TSO unsafe . Pointer
lastSavedTime atomic . Value
}
2021-09-25 03:43:56 +00:00
func ( t * timestampOracle ) loadTimestamp ( ) ( time . Time , error ) {
strData , err := t . txnKV . Load ( t . key )
if err != nil {
// intend to return nil
return typeutil . ZeroTime , nil
}
2023-09-21 01:45:27 +00:00
binData := [ ] byte ( strData )
2021-09-25 03:43:56 +00:00
if len ( binData ) == 0 {
return typeutil . ZeroTime , nil
}
return typeutil . ParseTimestamp ( binData )
}
2021-01-26 01:38:40 +00:00
// save timestamp, if lastTs is 0, we think the timestamp doesn't exist, so create it,
// otherwise, update it.
func ( t * timestampOracle ) saveTimestamp ( ts time . Time ) error {
2023-09-21 01:45:27 +00:00
// we use big endian here for compatibility issues
2021-11-05 11:10:59 +00:00
data := typeutil . Uint64ToBytesBigEndian ( uint64 ( ts . UnixNano ( ) ) )
2021-04-12 10:09:28 +00:00
err := t . txnKV . Save ( t . key , string ( data ) )
2021-01-26 01:38:40 +00:00
if err != nil {
return errors . WithStack ( err )
}
t . lastSavedTime . Store ( ts )
return nil
}
func ( t * timestampOracle ) InitTimestamp ( ) error {
2021-09-25 03:43:56 +00:00
last , err := t . loadTimestamp ( )
if err != nil {
return err
}
2021-01-26 01:38:40 +00:00
next := time . Now ( )
2021-10-22 09:37:12 +00:00
// If the current system time minus the saved etcd timestamp is less than `updateTimestampGuard`,
// the timestamp allocation will start from the saved etcd timestamp temporarily.
2021-09-25 03:43:56 +00:00
if typeutil . SubTimeByWallClock ( next , last ) < updateTimestampGuard {
next = last . Add ( updateTimestampGuard )
}
2021-01-26 01:38:40 +00:00
save := next . Add ( t . saveInterval )
if err := t . saveTimestamp ( save ) ; err != nil {
return err
}
2022-03-02 06:31:55 +00:00
log . Info ( "sync and save timestamp" , zap . Time ( "last" , last ) , zap . Time ( "save" , save ) , zap . Time ( "next" , next ) )
2021-01-26 01:38:40 +00:00
current := & atomicObject {
physical : next ,
}
2021-09-18 06:45:50 +00:00
// atomic unsafe pointer
/* #nosec G103 */
2021-01-26 01:38:40 +00:00
atomic . StorePointer ( & t . TSO , unsafe . Pointer ( current ) )
return nil
}
// ResetUserTimestamp update the physical part with specified tso.
func ( t * timestampOracle ) ResetUserTimestamp ( tso uint64 ) error {
physical , _ := tsoutil . ParseTS ( tso )
next := physical . Add ( time . Millisecond )
prev := ( * atomicObject ) ( atomic . LoadPointer ( & t . TSO ) )
// do not update
if typeutil . SubTimeByWallClock ( next , prev . physical ) <= 3 * updateTimestampGuard {
return errors . New ( "the specified ts too small than now" )
}
if typeutil . SubTimeByWallClock ( next , prev . physical ) >= t . maxResetTSGap ( ) {
return errors . New ( "the specified ts too large than now" )
}
save := next . Add ( t . saveInterval )
if err := t . saveTimestamp ( save ) ; err != nil {
return err
}
update := & atomicObject {
physical : next ,
}
2021-09-18 06:45:50 +00:00
// atomic unsafe pointer
/* #nosec G103 */
2021-01-26 01:38:40 +00:00
atomic . CompareAndSwapPointer ( & t . TSO , unsafe . Pointer ( prev ) , unsafe . Pointer ( update ) )
return nil
}
// UpdateTimestamp is used to update the timestamp.
// This function will do two things:
2023-02-26 03:31:49 +00:00
// 1. When the logical time is going to be used up, increase the current physical time.
// 2. When the time window is not big enough, which means the saved etcd time minus the next physical time
// will be less than or equal to `updateTimestampGuard`, then the time window needs to be updated and
// we also need to save the next physical time plus `TsoSaveInterval` into etcd.
2021-01-26 01:38:40 +00:00
//
// Here is some constraints that this function must satisfy:
// 1. The saved time is monotonically increasing.
// 2. The physical time is monotonically increasing.
// 3. The physical time is always less than the saved timestamp.
func ( t * timestampOracle ) UpdateTimestamp ( ) error {
prev := ( * atomicObject ) ( atomic . LoadPointer ( & t . TSO ) )
now := time . Now ( )
jetLag := typeutil . SubTimeByWallClock ( now , prev . physical )
if jetLag > 3 * UpdateTimestampStep {
2023-03-31 07:22:23 +00:00
log . Ctx ( context . TODO ( ) ) . WithRateGroup ( "tso" , 1 , 60 ) . RatedWarn ( 60.0 , "clock offset is huge, check network latency and clock skew" , zap . Duration ( "jet-lag" , jetLag ) ,
2022-03-02 06:31:55 +00:00
zap . Time ( "prev-physical" , prev . physical ) , zap . Time ( "now" , now ) )
2021-01-26 01:38:40 +00:00
}
var next time . Time
prevLogical := atomic . LoadInt64 ( & prev . logical )
// If the system time is greater, it will be synchronized with the system time.
if jetLag > updateTimestampGuard {
next = now
} else if prevLogical > maxLogical / 2 {
// The reason choosing maxLogical/2 here is that it's big enough for common cases.
// Because there is enough timestamp can be allocated before next update.
2022-03-02 06:31:55 +00:00
log . Warn ( "the logical time may be not enough" , zap . Int64 ( "prev-logical" , prevLogical ) )
2021-01-26 01:38:40 +00:00
next = prev . physical . Add ( time . Millisecond )
} else {
// It will still use the previous physical time to alloc the timestamp.
return nil
}
// It is not safe to increase the physical time to `next`.
// The time window needs to be updated and saved to etcd.
if typeutil . SubTimeByWallClock ( t . lastSavedTime . Load ( ) . ( time . Time ) , next ) <= updateTimestampGuard {
save := next . Add ( t . saveInterval )
if err := t . saveTimestamp ( save ) ; err != nil {
return err
}
}
current := & atomicObject {
physical : next ,
logical : 0 ,
}
2021-09-18 06:45:50 +00:00
// atomic unsafe pointer
/* #nosec G103 */
2021-01-26 01:38:40 +00:00
atomic . StorePointer ( & t . TSO , unsafe . Pointer ( current ) )
return nil
}
// ResetTimestamp is used to reset the timestamp.
func ( t * timestampOracle ) ResetTimestamp ( ) {
zero := & atomicObject {
physical : time . Now ( ) ,
}
2021-09-18 06:45:50 +00:00
// atomic unsafe pointer
/* #nosec G103 */
2021-01-26 01:38:40 +00:00
atomic . StorePointer ( & t . TSO , unsafe . Pointer ( zero ) )
}