Merge pull request #2218 from karalabe/time-drift-warning

p2p/discover: NTP sanity check clock drift in case of expirations
This commit is contained in:
Péter Szilágyi 2016-02-24 13:12:52 +02:00
commit 1415669ac3
2 changed files with 149 additions and 4 deletions

127
p2p/discover/ntp.go Normal file
View File

@ -0,0 +1,127 @@
// Copyright 2016 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Contains the NTP time drift detection via the SNTP protocol:
// https://tools.ietf.org/html/rfc4330
package discover
import (
"fmt"
"net"
"sort"
"strings"
"time"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
const (
ntpPool = "pool.ntp.org" // ntpPool is the NTP server to query for the current time
ntpChecks = 3 // Number of measurements to do against the NTP server
)
// durationSlice attaches the methods of sort.Interface to []time.Duration,
// sorting in increasing order.
type durationSlice []time.Duration
func (s durationSlice) Len() int { return len(s) }
func (s durationSlice) Less(i, j int) bool { return s[i] < s[j] }
func (s durationSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// checkClockDrift queries an NTP server for clock drifts and warns the user if
// one large enough is detected.
func checkClockDrift() {
drift, err := sntpDrift(ntpChecks)
if err != nil {
return
}
if drift < -driftThreshold || drift > driftThreshold {
warning := fmt.Sprintf("System clock seems off by %v, which can prevent network connectivity", drift)
howtofix := fmt.Sprintf("Please enable network time synchronisation in system settings")
separator := strings.Repeat("-", len(warning))
glog.V(logger.Warn).Info(separator)
glog.V(logger.Warn).Info(warning)
glog.V(logger.Warn).Info(howtofix)
glog.V(logger.Warn).Info(separator)
} else {
glog.V(logger.Debug).Infof("Sanity NTP check reported %v drift, all ok", drift)
}
}
// sntpDrift does a naive time resolution against an NTP server and returns the
// measured drift. This method uses the simple version of NTP. It's not precise
// but should be fine for these purposes.
//
// Note, it executes two extra measurements compared to the number of requested
// ones to be able to discard the two extremes as outliers.
func sntpDrift(measurements int) (time.Duration, error) {
// Resolve the address of the NTP server
addr, err := net.ResolveUDPAddr("udp", ntpPool+":123")
if err != nil {
return 0, err
}
// Construct the time request (empty package with only 2 fields set):
// Bits 3-5: Protocol version, 3
// Bits 6-8: Mode of operation, client, 3
request := make([]byte, 48)
request[0] = 3<<3 | 3
// Execute each of the measurements
drifts := []time.Duration{}
for i := 0; i < measurements+2; i++ {
// Dial the NTP server and send the time retrieval request
conn, err := net.DialUDP("udp", nil, addr)
if err != nil {
return 0, err
}
defer conn.Close()
sent := time.Now()
if _, err = conn.Write(request); err != nil {
return 0, err
}
// Retrieve the reply and calculate the elapsed time
conn.SetDeadline(time.Now().Add(5 * time.Second))
reply := make([]byte, 48)
if _, err = conn.Read(reply); err != nil {
return 0, err
}
elapsed := time.Since(sent)
// Reconstruct the time from the reply data
sec := uint64(reply[43]) | uint64(reply[42])<<8 | uint64(reply[41])<<16 | uint64(reply[40])<<24
frac := uint64(reply[47]) | uint64(reply[46])<<8 | uint64(reply[45])<<16 | uint64(reply[44])<<24
nanosec := sec*1e9 + (frac*1e9)>>32
t := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(nanosec)).Local()
// Calculate the drift based on an assumed answer time of RRT/2
drifts = append(drifts, sent.Sub(t)+elapsed/2)
}
// Calculate average drif (drop two extremities to avoid outliers)
sort.Sort(durationSlice(drifts))
drift := time.Duration(0)
for i := 1; i < len(drifts)-1; i++ {
drift += drifts[i]
}
return drift / time.Duration(measurements), nil
}

View File

@ -51,6 +51,10 @@ const (
respTimeout = 500 * time.Millisecond
sendTimeout = 500 * time.Millisecond
expiration = 20 * time.Second
ntpFailureThreshold = 32 // Continuous timeouts after which to check NTP
ntpWarningCooldown = 10 * time.Minute // Minimum amount of time to pass before repeating NTP warning
driftThreshold = 10 * time.Second // Allowed clock drift before warning user
)
// RPC packet types
@ -316,13 +320,15 @@ func (t *udp) handleReply(from NodeID, ptype byte, req packet) bool {
}
}
// loop runs in its own goroutin. it keeps track of
// loop runs in its own goroutine. it keeps track of
// the refresh timer and the pending reply queue.
func (t *udp) loop() {
var (
plist = list.New()
timeout = time.NewTimer(0)
nextTimeout *pending // head of plist when timeout was last reset
plist = list.New()
timeout = time.NewTimer(0)
nextTimeout *pending // head of plist when timeout was last reset
contTimeouts = 0 // number of continuous timeouts to do NTP checks
ntpWarnTime = time.Unix(0, 0)
)
<-timeout.C // ignore first timeout
defer timeout.Stop()
@ -377,20 +383,32 @@ func (t *udp) loop() {
p.errc <- nil
plist.Remove(el)
}
// Reset the continuous timeout counter (time drift detection)
contTimeouts = 0
}
}
r.matched <- matched
case now := <-timeout.C:
nextTimeout = nil
// Notify and remove callbacks whose deadline is in the past.
for el := plist.Front(); el != nil; el = el.Next() {
p := el.Value.(*pending)
if now.After(p.deadline) || now.Equal(p.deadline) {
p.errc <- errTimeout
plist.Remove(el)
contTimeouts++
}
}
// If we've accumulated too many timeouts, do an NTP time sync check
if contTimeouts > ntpFailureThreshold {
if time.Since(ntpWarnTime) >= ntpWarningCooldown {
ntpWarnTime = time.Now()
go checkClockDrift()
}
contTimeouts = 0
}
}
}
}