tendermint/p2p/trust/trustmetric.go

546 lines
14 KiB
Go
Raw Normal View History

// Copyright 2017 Tendermint. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package trust
import (
"encoding/json"
"math"
"sync"
"time"
cmn "github.com/tendermint/tmlibs/common"
dbm "github.com/tendermint/tmlibs/db"
)
// TrustMetricStore - Manages all trust metrics for peers
type TrustMetricStore struct {
cmn.BaseService
// Maps a Peer.Key to that peer's TrustMetric
peerMetrics map[string]*TrustMetric
// Mutex that protects the map and history data file
mtx sync.Mutex
// The db where peer trust metric history data will be stored
db dbm.DB
// This configuration will be used when creating new TrustMetrics
config TrustMetricConfig
}
// NewTrustMetricStore returns a store that saves data to the DB
// and uses the config when creating new trust metrics
func NewTrustMetricStore(db dbm.DB, tmc TrustMetricConfig) *TrustMetricStore {
tms := &TrustMetricStore{
peerMetrics: make(map[string]*TrustMetric),
db: db,
config: tmc,
}
tms.BaseService = *cmn.NewBaseService(nil, "TrustMetricStore", tms)
return tms
}
// OnStart implements Service
func (tms *TrustMetricStore) OnStart() error {
tms.BaseService.OnStart()
tms.mtx.Lock()
defer tms.mtx.Unlock()
tms.loadFromDB()
return nil
}
// OnStop implements Service
func (tms *TrustMetricStore) OnStop() {
tms.mtx.Lock()
defer tms.mtx.Unlock()
// Stop all trust metric goroutines
for _, tm := range tms.peerMetrics {
tm.Stop()
}
tms.saveToDB()
tms.BaseService.OnStop()
}
// Size returns the number of entries in the trust metric store
func (tms *TrustMetricStore) Size() int {
tms.mtx.Lock()
defer tms.mtx.Unlock()
return tms.size()
}
// GetPeerTrustMetric returns a trust metric by peer key
func (tms *TrustMetricStore) GetPeerTrustMetric(key string) *TrustMetric {
tms.mtx.Lock()
defer tms.mtx.Unlock()
tm, ok := tms.peerMetrics[key]
if !ok {
// If the metric is not available, we will create it
tm = NewMetricWithConfig(tms.config)
if tm != nil {
// The metric needs to be in the map
tms.peerMetrics[key] = tm
}
}
return tm
}
// PeerDisconnected pauses the trust metric associated with the peer identified by the key
func (tms *TrustMetricStore) PeerDisconnected(key string) {
tms.mtx.Lock()
defer tms.mtx.Unlock()
// If the Peer that disconnected has a metric, pause it
if tm, ok := tms.peerMetrics[key]; ok {
tm.Pause()
}
}
/* Private methods */
// size returns the number of entries in the store without acquiring the mutex
func (tms *TrustMetricStore) size() int {
return len(tms.peerMetrics)
}
/* Loading & Saving */
/* Both of these methods assume the mutex has been acquired, since they write to the map */
var trustMetricKey = []byte("trustMetricStore")
type peerHistoryJSON struct {
NumIntervals int `json:"intervals"`
History []float64 `json:"history"`
}
// Loads the history data for the Peer identified by key from the store DB.
// cmn.Panics if file is corrupt
func (tms *TrustMetricStore) loadFromDB() bool {
// Obtain the history data we have so far
bytes := tms.db.Get(trustMetricKey)
if bytes == nil {
return false
}
peers := make(map[string]peerHistoryJSON, 0)
err := json.Unmarshal(bytes, &peers)
if err != nil {
cmn.PanicCrisis(cmn.Fmt("Could not unmarchal Trust Metric Store DB data: %v", err))
}
// If history data exists in the file,
// load it into trust metrics and recalc
for key, p := range peers {
tm := NewMetricWithConfig(tms.config)
// Restore the number of time intervals we have previously tracked
if p.NumIntervals > tm.maxIntervals {
p.NumIntervals = tm.maxIntervals
}
tm.numIntervals = p.NumIntervals
// Restore the history and its current size
if len(p.History) > tm.historyMaxSize {
p.History = p.History[:tm.historyMaxSize]
}
tm.history = p.History
tm.historySize = len(tm.history)
// Calculate the history value based on the loaded history data
tm.historyValue = tm.calcHistoryValue()
// Load the peer trust metric into the store
tms.peerMetrics[key] = tm
}
return true
}
// Saves the history data for all peers to the store DB
func (tms *TrustMetricStore) saveToDB() {
tms.Logger.Info("Saving TrustHistory to DB", "size", tms.size())
peers := make(map[string]peerHistoryJSON, 0)
for key, tm := range tms.peerMetrics {
// Add an entry for the peer identified by key
peers[key] = peerHistoryJSON{
NumIntervals: tm.numIntervals,
History: tm.history,
}
}
// Write all the data back to the DB
bytes, err := json.Marshal(peers)
if err != nil {
tms.Logger.Error("Failed to encode the TrustHistory", "err", err)
return
}
tms.db.SetSync(trustMetricKey, bytes)
}
//---------------------------------------------------------------------------------------
// The number of event updates that can be sent on a single metric before blocking
const defaultUpdateChanCapacity = 10
// The number of trust value requests that can be made simultaneously before blocking
const defaultRequestChanCapacity = 10
// TrustMetric - keeps track of peer reliability
// See tendermint/docs/architecture/adr-006-trust-metric.md for details
type TrustMetric struct {
// Determines the percentage given to current behavior
proportionalWeight float64
// Determines the percentage given to prior behavior
integralWeight float64
// Count of how many time intervals this metric has been tracking
numIntervals int
// Size of the time interval window for this trust metric
maxIntervals int
// The time duration for a single time interval
intervalLen time.Duration
// Stores the trust history data for this metric
history []float64
// The current number of history data elements
historySize int
// The maximum number of history data elements
historyMaxSize int
// The calculated history value for the current time interval
historyValue float64
// The number of recorded good and bad events for the current time interval
bad, good float64
// While true, history data is not modified
paused bool
// Sending true on this channel stops tracking, while false pauses tracking
stop chan bool
// For sending information about new good/bad events to be recorded
update chan *updateBadGood
// The channel to request a newly calculated trust value
trustValue chan *reqTrustValue
}
// For the TrustMetric update channel
type updateBadGood struct {
IsBad bool
Add int
}
// For the TrustMetric trustValue channel
type reqTrustValue struct {
// The requested trust value is sent back on this channel
Resp chan float64
}
// Pause tells the metric to pause recording data over time intervals.
// All method calls that indicate events will unpause the metric
func (tm *TrustMetric) Pause() {
tm.stop <- false
}
// Stop tells the metric to stop recording data over time intervals
func (tm *TrustMetric) Stop() {
tm.stop <- true
}
// BadEvent indicates that an undesirable event took place
func (tm *TrustMetric) BadEvent() {
tm.update <- &updateBadGood{IsBad: true, Add: 1}
}
// AddBadEvents acknowledges multiple undesirable events
func (tm *TrustMetric) AddBadEvents(num int) {
tm.update <- &updateBadGood{IsBad: true, Add: num}
}
// GoodEvent indicates that a desirable event took place
func (tm *TrustMetric) GoodEvent() {
tm.update <- &updateBadGood{IsBad: false, Add: 1}
}
// AddGoodEvents acknowledges multiple desirable events
func (tm *TrustMetric) AddGoodEvents(num int) {
tm.update <- &updateBadGood{IsBad: false, Add: num}
}
// TrustValue gets the dependable trust value; always between 0 and 1
func (tm *TrustMetric) TrustValue() float64 {
resp := make(chan float64, 1)
tm.trustValue <- &reqTrustValue{Resp: resp}
return <-resp
}
// TrustScore gets a score based on the trust value always between 0 and 100
func (tm *TrustMetric) TrustScore() int {
resp := make(chan float64, 1)
tm.trustValue <- &reqTrustValue{Resp: resp}
return int(math.Floor(<-resp * 100))
}
// TrustMetricConfig - Configures the weight functions and time intervals for the metric
type TrustMetricConfig struct {
// Determines the percentage given to current behavior
ProportionalWeight float64
// Determines the percentage given to prior behavior
IntegralWeight float64
// The window of time that the trust metric will track events across.
// This can be set to cover many days without issue
TrackingWindow time.Duration
// Each interval should be short for adapability.
// Less than 30 seconds is too sensitive,
// and greater than 5 minutes will make the metric numb
IntervalLength time.Duration
}
// DefaultConfig returns a config with values that have been tested and produce desirable results
func DefaultConfig() TrustMetricConfig {
return TrustMetricConfig{
ProportionalWeight: 0.4,
IntegralWeight: 0.6,
TrackingWindow: (time.Minute * 60 * 24) * 14, // 14 days.
IntervalLength: 1 * time.Minute,
}
}
// NewMetric returns a trust metric with the default configuration
func NewMetric() *TrustMetric {
return NewMetricWithConfig(DefaultConfig())
}
// NewMetricWithConfig returns a trust metric with a custom configuration
func NewMetricWithConfig(tmc TrustMetricConfig) *TrustMetric {
tm := new(TrustMetric)
config := customConfig(tmc)
// Setup using the configuration values
tm.proportionalWeight = config.ProportionalWeight
tm.integralWeight = config.IntegralWeight
tm.intervalLen = config.IntervalLength
// The maximum number of time intervals is the tracking window / interval length
tm.maxIntervals = int(config.TrackingWindow / tm.intervalLen)
// The history size will be determined by the maximum number of time intervals
tm.historyMaxSize = intervalToHistoryIndex(tm.maxIntervals) + 1
// This metric has a perfect history so far
tm.historyValue = 1.0
// Setup the channels
tm.update = make(chan *updateBadGood, defaultUpdateChanCapacity)
tm.trustValue = make(chan *reqTrustValue, defaultRequestChanCapacity)
tm.stop = make(chan bool, 2)
go tm.processRequests()
return tm
}
/* Private methods */
// Ensures that all configuration elements have valid values
func customConfig(tmc TrustMetricConfig) TrustMetricConfig {
config := DefaultConfig()
// Check the config for set values, and setup appropriately
if tmc.ProportionalWeight > 0 {
config.ProportionalWeight = tmc.ProportionalWeight
}
if tmc.IntegralWeight > 0 {
config.IntegralWeight = tmc.IntegralWeight
}
if tmc.IntervalLength > time.Duration(0) {
config.IntervalLength = tmc.IntervalLength
}
if tmc.TrackingWindow > time.Duration(0) &&
tmc.TrackingWindow >= config.IntervalLength {
config.TrackingWindow = tmc.TrackingWindow
}
return config
}
// Calculates the derivative component
func (tm *TrustMetric) derivativeValue() float64 {
return tm.proportionalValue() - tm.historyValue
}
// Strengthens the derivative component when the change is negative
func (tm *TrustMetric) weightedDerivative() float64 {
var weight float64
d := tm.derivativeValue()
if d < 0 {
weight = 1.0
}
return weight * d
}
// Map the interval value down to an actual history index
func intervalToHistoryIndex(interval int) int {
return int(math.Floor(math.Log(float64(interval)) / math.Log(2)))
}
// Retrieves the actual history data value that represents the requested time interval
func (tm *TrustMetric) fadedMemoryValue(interval int) float64 {
if interval == 0 {
// Base case
return tm.history[0]
}
return tm.history[intervalToHistoryIndex(interval)]
}
// Performs the update for our Faded Memories process, which allows the
// trust metric tracking window to be large while maintaining a small
// number of history data values
func (tm *TrustMetric) updateFadedMemory() {
if tm.historySize < 2 {
return
}
// Keep the most recent history element
faded := tm.history[:1]
for i := 1; i < tm.historySize; i++ {
// The older the data is, the more we spread it out
x := math.Pow(2, float64(i))
// Two history data values are merged into a single value
ftv := ((tm.history[i] * (x - 1)) + tm.history[i-1]) / x
faded = append(faded, ftv)
}
tm.history = faded
}
// Calculates the integral (history) component of the trust value
func (tm *TrustMetric) calcHistoryValue() float64 {
var wk []float64
// Create the weights.
hlen := tm.numIntervals
for i := 0; i < hlen; i++ {
x := math.Pow(.8, float64(i+1)) // Optimistic weight
wk = append(wk, x)
}
var wsum float64
// Calculate the sum of the weights
for _, v := range wk {
wsum += v
}
var hv float64
// Calculate the history value
for i := 0; i < hlen; i++ {
weight := wk[i] / wsum
hv += tm.fadedMemoryValue(i) * weight
}
return hv
}
// Calculates the current score for good/bad experiences
func (tm *TrustMetric) proportionalValue() float64 {
value := 1.0
// Bad events are worth more in the calculation of our score
total := tm.good + math.Pow(tm.bad, 2)
if tm.bad > 0 || tm.good > 0 {
value = tm.good / total
}
return value
}
// Calculates the trust value for the request processing
func (tm *TrustMetric) calcTrustValue() float64 {
weightedP := tm.proportionalWeight * tm.proportionalValue()
weightedI := tm.integralWeight * tm.historyValue
weightedD := tm.weightedDerivative()
tv := weightedP + weightedI + weightedD
// Do not return a negative value.
if tv < 0 {
tv = 0
}
return tv
}
// This method is for a goroutine that handles all requests on the metric
func (tm *TrustMetric) processRequests() {
t := time.NewTicker(tm.intervalLen)
defer t.Stop()
loop:
for {
select {
case bg := <-tm.update:
// Check if this is the first experience with
// what we are tracking since being paused
if tm.paused {
tm.good = 0
tm.bad = 0
// New events cause us to unpause the metric
tm.paused = false
}
if bg.IsBad {
tm.bad += float64(bg.Add)
} else {
tm.good += float64(bg.Add)
}
case rtv := <-tm.trustValue:
rtv.Resp <- tm.calcTrustValue()
case <-t.C:
if !tm.paused {
// Add the current trust value to the history data
newHist := tm.calcTrustValue()
tm.history = append([]float64{newHist}, tm.history...)
// Update history and interval counters
if tm.historySize < tm.historyMaxSize {
tm.historySize++
} else {
tm.history = tm.history[:tm.historyMaxSize]
}
if tm.numIntervals < tm.maxIntervals {
tm.numIntervals++
}
// Update the history data using Faded Memories
tm.updateFadedMemory()
// Calculate the history value for the upcoming time interval
tm.historyValue = tm.calcHistoryValue()
tm.good = 0
tm.bad = 0
}
case stop := <-tm.stop:
if stop {
// Stop all further tracking for this metric
break loop
}
// Pause the metric for now
tm.paused = true
}
}
}