From b1908f6a16e712d4059195db3f22a0b856907bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Mon, 15 Feb 2016 20:31:58 +0200 Subject: [PATCH 1/2] psp/discovery: NTP sanity check clock drift in case of expirations --- p2p/discover/ntp.go | 99 +++++++++++++++++++++++++++++++++++++++++++++ p2p/discover/udp.go | 33 +++++++++++++-- 2 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 p2p/discover/ntp.go diff --git a/p2p/discover/ntp.go b/p2p/discover/ntp.go new file mode 100644 index 000000000..a87ee1fbe --- /dev/null +++ b/p2p/discover/ntp.go @@ -0,0 +1,99 @@ +// Copyright 2016 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Contains the NTP time drift detection via the SNTP protocol: +// https://tools.ietf.org/html/rfc4330 + +package discover + +import ( + "net" + "sort" + "time" +) + +// ntpPool is the NTP server to query for the current time +const ntpPool = "pool.ntp.org" + +// durationSlice attaches the methods of sort.Interface to []time.Duration, +// sorting in increasing order. +type durationSlice []time.Duration + +func (s durationSlice) Len() int { return len(s) } +func (s durationSlice) Less(i, j int) bool { return s[i] < s[j] } +func (s durationSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +// sntpDrift does a naive time resolution against an NTP server and returns the +// measured drift. This method uses the simple version of NTP. It's not precise +// but should be fine for these purposes. +// +// Note, it executes two extra measurements compared to the number of requested +// ones to be able to discard the two extremes as outliers. +func sntpDrift(measurements int) (time.Duration, error) { + // Resolve the address of the NTP server + addr, err := net.ResolveUDPAddr("udp", ntpPool+":123") + if err != nil { + return 0, err + } + // Construct the time request (empty package with only 2 fields set): + // Bits 3-5: Protocol version, 3 + // Bits 6-8: Mode of operation, client, 3 + request := make([]byte, 48) + request[0] = 3<<3 | 3 + + // Execute each of the measurements + drifts := []time.Duration{} + for i := 0; i < measurements+2; i++ { + // Dial the NTP server and send the time retrieval request + conn, err := net.DialUDP("udp", nil, addr) + if err != nil { + return 0, err + } + defer conn.Close() + + sent := time.Now() + if _, err = conn.Write(request); err != nil { + return 0, err + } + // Retrieve the reply and calculate the elapsed time + conn.SetDeadline(time.Now().Add(5 * time.Second)) + + reply := make([]byte, 48) + if _, err = conn.Read(reply); err != nil { + return 0, err + } + elapsed := time.Since(sent) + + // Reconstruct the time from the reply data + sec := uint64(reply[43]) | uint64(reply[42])<<8 | uint64(reply[41])<<16 | uint64(reply[40])<<24 + frac := uint64(reply[47]) | uint64(reply[46])<<8 | uint64(reply[45])<<16 | uint64(reply[44])<<24 + + nanosec := sec*1e9 + (frac*1e9)>>32 + + t := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(nanosec)).Local() + + // Calculate the drift based on an assumed answer time of RRT/2 + drifts = append(drifts, sent.Sub(t)+elapsed/2) + } + // Calculate average drif (drop two extremities to avoid outliers) + sort.Sort(durationSlice(drifts)) + + drift := time.Duration(0) + for i := 1; i < len(drifts)-1; i++ { + drift += drifts[i] + } + return drift / time.Duration(measurements), nil +} diff --git a/p2p/discover/udp.go b/p2p/discover/udp.go index 81674f552..fa4bc07dc 100644 --- a/p2p/discover/udp.go +++ b/p2p/discover/udp.go @@ -51,6 +51,9 @@ const ( respTimeout = 500 * time.Millisecond sendTimeout = 500 * time.Millisecond expiration = 20 * time.Second + + ntpThreshold = 32 // Continuous timeouts after which to check NTP + driftThreshold = 10 * time.Second // Allowed clock drift before warning user ) // RPC packet types @@ -316,13 +319,14 @@ func (t *udp) handleReply(from NodeID, ptype byte, req packet) bool { } } -// loop runs in its own goroutin. it keeps track of +// loop runs in its own goroutine. it keeps track of // the refresh timer and the pending reply queue. func (t *udp) loop() { var ( - plist = list.New() - timeout = time.NewTimer(0) - nextTimeout *pending // head of plist when timeout was last reset + plist = list.New() + timeout = time.NewTimer(0) + nextTimeout *pending // head of plist when timeout was last reset + contTimeouts = 0 // number of continuous timeouts to do NTP checks ) <-timeout.C // ignore first timeout defer timeout.Stop() @@ -377,20 +381,41 @@ func (t *udp) loop() { p.errc <- nil plist.Remove(el) } + // Reset the continuous timeout counter (time drift detection) + contTimeouts = 0 } } r.matched <- matched case now := <-timeout.C: nextTimeout = nil + // Notify and remove callbacks whose deadline is in the past. for el := plist.Front(); el != nil; el = el.Next() { p := el.Value.(*pending) if now.After(p.deadline) || now.Equal(p.deadline) { p.errc <- errTimeout plist.Remove(el) + contTimeouts++ } } + // If we've accumulated too many timeouts, do an NTP time sync check + if contTimeouts > ntpThreshold { + go func() { + drift, err := sntpDrift(3) + switch { + case err != nil: + glog.V(logger.Warn).Infof("No UDP connectivity, maybe blocked by firewall? (%v)", err) + + case drift < -driftThreshold || drift > driftThreshold: + glog.V(logger.Warn).Infof("System clock seems off by %v, which can prevent network connectivity", drift) + + default: + glog.V(logger.Debug).Infof("Sanity NTP check reported %v drift, all ok", drift) + } + }() + contTimeouts = 0 + } } } } From ac954f48bd9f9897f3118782ff2e68eeb3e1ed4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 19 Feb 2016 16:18:55 +0200 Subject: [PATCH 2/2] p2p/discover: emphasize warning, add 10 min cooldown --- p2p/discover/ntp.go | 32 ++++++++++++++++++++++++++++++-- p2p/discover/udp.go | 25 +++++++++---------------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/p2p/discover/ntp.go b/p2p/discover/ntp.go index a87ee1fbe..c1a4b3af1 100644 --- a/p2p/discover/ntp.go +++ b/p2p/discover/ntp.go @@ -20,13 +20,20 @@ package discover import ( + "fmt" "net" "sort" + "strings" "time" + + "github.com/ethereum/go-ethereum/logger" + "github.com/ethereum/go-ethereum/logger/glog" ) -// ntpPool is the NTP server to query for the current time -const ntpPool = "pool.ntp.org" +const ( + ntpPool = "pool.ntp.org" // ntpPool is the NTP server to query for the current time + ntpChecks = 3 // Number of measurements to do against the NTP server +) // durationSlice attaches the methods of sort.Interface to []time.Duration, // sorting in increasing order. @@ -36,6 +43,27 @@ func (s durationSlice) Len() int { return len(s) } func (s durationSlice) Less(i, j int) bool { return s[i] < s[j] } func (s durationSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +// checkClockDrift queries an NTP server for clock drifts and warns the user if +// one large enough is detected. +func checkClockDrift() { + drift, err := sntpDrift(ntpChecks) + if err != nil { + return + } + if drift < -driftThreshold || drift > driftThreshold { + warning := fmt.Sprintf("System clock seems off by %v, which can prevent network connectivity", drift) + howtofix := fmt.Sprintf("Please enable network time synchronisation in system settings") + separator := strings.Repeat("-", len(warning)) + + glog.V(logger.Warn).Info(separator) + glog.V(logger.Warn).Info(warning) + glog.V(logger.Warn).Info(howtofix) + glog.V(logger.Warn).Info(separator) + } else { + glog.V(logger.Debug).Infof("Sanity NTP check reported %v drift, all ok", drift) + } +} + // sntpDrift does a naive time resolution against an NTP server and returns the // measured drift. This method uses the simple version of NTP. It's not precise // but should be fine for these purposes. diff --git a/p2p/discover/udp.go b/p2p/discover/udp.go index fa4bc07dc..03fa0c348 100644 --- a/p2p/discover/udp.go +++ b/p2p/discover/udp.go @@ -52,8 +52,9 @@ const ( sendTimeout = 500 * time.Millisecond expiration = 20 * time.Second - ntpThreshold = 32 // Continuous timeouts after which to check NTP - driftThreshold = 10 * time.Second // Allowed clock drift before warning user + ntpFailureThreshold = 32 // Continuous timeouts after which to check NTP + ntpWarningCooldown = 10 * time.Minute // Minimum amount of time to pass before repeating NTP warning + driftThreshold = 10 * time.Second // Allowed clock drift before warning user ) // RPC packet types @@ -327,6 +328,7 @@ func (t *udp) loop() { timeout = time.NewTimer(0) nextTimeout *pending // head of plist when timeout was last reset contTimeouts = 0 // number of continuous timeouts to do NTP checks + ntpWarnTime = time.Unix(0, 0) ) <-timeout.C // ignore first timeout defer timeout.Stop() @@ -400,20 +402,11 @@ func (t *udp) loop() { } } // If we've accumulated too many timeouts, do an NTP time sync check - if contTimeouts > ntpThreshold { - go func() { - drift, err := sntpDrift(3) - switch { - case err != nil: - glog.V(logger.Warn).Infof("No UDP connectivity, maybe blocked by firewall? (%v)", err) - - case drift < -driftThreshold || drift > driftThreshold: - glog.V(logger.Warn).Infof("System clock seems off by %v, which can prevent network connectivity", drift) - - default: - glog.V(logger.Debug).Infof("Sanity NTP check reported %v drift, all ok", drift) - } - }() + if contTimeouts > ntpFailureThreshold { + if time.Since(ntpWarnTime) >= ntpWarningCooldown { + ntpWarnTime = time.Now() + go checkClockDrift() + } contTimeouts = 0 } }