From b7305f78b6185b4f46cd69dac93e42f324542a4b Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 20 Apr 2020 10:40:39 -0400 Subject: [PATCH 01/12] Properly remove the IP when removing the connection --- networking/connections.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/networking/connections.go b/networking/connections.go index 6226e01..239d814 100644 --- a/networking/connections.go +++ b/networking/connections.go @@ -187,15 +187,18 @@ func (c *connections) getID(peer salticidae.PeerID) (ids.ShortID, bool) { } func (c *connections) remove(peer salticidae.PeerID, id ids.ShortID) { - c.removePeerID(peer) c.removeID(id) + c.removePeerID(peer) } func (c *connections) removePeerID(peer salticidae.PeerID) { peerID := toID(peer) if id, exists := c.peerIDToID[peerID]; exists { + idKey := id.Key() + delete(c.peerIDToID, peerID) - delete(c.idToPeerID, id.Key()) + delete(c.idToPeerID, idKey) + delete(c.idToIP, idKey) } } @@ -204,6 +207,7 @@ func (c *connections) removeID(id ids.ShortID) { if peer, exists := c.idToPeerID[idKey]; exists { delete(c.peerIDToID, toID(peer)) delete(c.idToPeerID, idKey) + delete(c.idToIP, idKey) } } From b4f3bb6719664f4246caa253269cec0738512013 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 20 Apr 2020 10:42:16 -0400 Subject: [PATCH 02/12] reduce diff --- networking/connections.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/networking/connections.go b/networking/connections.go index 239d814..4eaaaf7 100644 --- a/networking/connections.go +++ b/networking/connections.go @@ -187,8 +187,8 @@ func (c *connections) getID(peer salticidae.PeerID) (ids.ShortID, bool) { } func (c *connections) remove(peer salticidae.PeerID, id ids.ShortID) { - c.removeID(id) c.removePeerID(peer) + c.removeID(id) } func (c *connections) removePeerID(peer salticidae.PeerID) { From 6bcedf1d64e2cac5bb8e6d267b2b33a54b0f2ce4 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 20 Apr 2020 20:00:36 -0400 Subject: [PATCH 03/12] wip --- networking/handshake_handlers.go | 140 +++++++++++++++----------- node/node.go | 6 +- scripts/ansible/inventory.yml | 33 +++--- scripts/ansible/ping_playbook.yml | 11 ++ scripts/ansible/restart_playbook.yml | 2 +- scripts/ansible/update_playbook.yml | 4 +- snow/engine/avalanche/bootstrapper.go | 1 + snow/engine/avalanche/voter.go | 19 +++- 8 files changed, 132 insertions(+), 84 deletions(-) create mode 100755 scripts/ansible/ping_playbook.yml diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index bfa15ee..94ac481 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -74,6 +74,10 @@ const ( // PeerListStakerGossipFraction calculates the fraction of stakers that are // gossiped to. If set to 1, then only stakers will be gossiped to. PeerListStakerGossipFraction = 2 + + // ConnectTimeout is the amount of time to wait before attempt to connect to + // an unknown peer + ConnectTimeout = time.Second // GetVersionTimeout is the amount of time to wait before sending a // getVersion message to a partially connected peer GetVersionTimeout = 2 * time.Second @@ -96,27 +100,37 @@ var ( type Handshake struct { handshakeMetrics - networkID uint32 + networkID uint32 // ID of the network I'm running, used to prevent connecting to the wrong network log logging.Logger - vdrs validators.Set - myAddr salticidae.NetAddr - myID ids.ShortID - net salticidae.PeerNetwork - enableStaking bool // Should only be false for local tests + vdrs validators.Set // set of current validators in the AVAnet + myAddr salticidae.NetAddr // IP I communicate to peers + myID ids.ShortID // ID that identifies myself as a staker or not + net salticidae.PeerNetwork // C messaging network + enableStaking bool // Should only be false for local tests - clock timer.Clock - pending Connections // Connections that I haven't gotten version messages from - connections Connections // Connections that I think are connected + clock timer.Clock - versionTimeout timer.TimeoutManager + // Connections that I have added by IP, but haven't gotten an ID from + requestedLock sync.Mutex + requested map[string]struct{} + requestedTimeout timer.TimeoutManager + + // Connections that I have added as a peer, but haven't gotten a version + // message from + pending Connections + versionTimeout timer.TimeoutManager + + // Connections that I have gotten a valid version message from + connections Connections reconnectTimeout timer.TimeoutManager + + // IPs of nodes I'm connected to will be repeatedly gossiped throughout the network peerListGossiper *timer.Repeater + // If any chain is blocked on connecting to peers, track these blockers here awaitingLock sync.Mutex awaiting []*networking.AwaitingConnections - - requestedConnections map[string]struct{} } // Initialize to the c networking library. This should only be done once during @@ -132,19 +146,34 @@ func (nm *Handshake) Initialize( networkID uint32, ) { log.AssertTrue(nm.net == nil, "Should only register network handlers once") + + nm.handshakeMetrics.Initialize(log, registerer) + + nm.networkID = networkID + nm.log = log nm.vdrs = vdrs nm.myAddr = myAddr nm.myID = myID nm.net = peerNet nm.enableStaking = enableStaking - nm.networkID = networkID + + nm.requested = make(map[string]struct{}) + nm.requestedTimeout.Initialize(ConnectTimeout) + go nm.log.RecoverAndPanic(nm.requestedTimeout.Dispatch) nm.pending = NewConnections() + nm.versionTimeout.Initialize(GetVersionTimeout) + go nm.log.RecoverAndPanic(nm.versionTimeout.Dispatch) + nm.connections = NewConnections() + nm.reconnectTimeout.Initialize(ReconnectTimeout) + go nm.log.RecoverAndPanic(nm.reconnectTimeout.Dispatch) - nm.requestedConnections = make(map[string]struct{}) + nm.peerListGossiper = timer.NewRepeater(nm.gossipPeerList, PeerListGossipSpacing) + go nm.log.RecoverAndPanic(nm.peerListGossiper.Dispatch) + // register c message callbacks net := peerNet.AsMsgNetwork() net.RegConnHandler(salticidae.MsgNetworkConnCallback(C.connHandler), nil) @@ -156,30 +185,41 @@ func (nm *Handshake) Initialize( net.RegHandler(Version, salticidae.MsgNetworkMsgCallback(C.version), nil) net.RegHandler(GetPeerList, salticidae.MsgNetworkMsgCallback(C.getPeerList), nil) net.RegHandler(PeerList, salticidae.MsgNetworkMsgCallback(C.peerList), nil) +} - nm.handshakeMetrics.Initialize(nm.log, registerer) +// ConnectTo add the peer as a connection and connects to them. Will free peer +func (nm *Handshake) ConnectTo(peer salticidae.PeerID, addr salticidae.NetAddr) { + if !nm.pending.ContainsPeerID(peer) && !nm.connections.ContainsPeerID(peer) { + HandshakeNet.net.AddPeer(peer) + HandshakeNet.net.SetPeerAddr(peer, addr) + HandshakeNet.net.ConnPeer(peer, 600, 1) - nm.versionTimeout.Initialize(GetVersionTimeout) - go nm.log.RecoverAndPanic(nm.versionTimeout.Dispatch) + // TODO: Should add the peer to the pending set, register a timeout to + // remove the peer + } - nm.reconnectTimeout.Initialize(ReconnectTimeout) - go nm.log.RecoverAndPanic(nm.reconnectTimeout.Dispatch) - - nm.peerListGossiper = timer.NewRepeater(nm.gossipPeerList, PeerListGossipSpacing) - go nm.log.RecoverAndPanic(nm.peerListGossiper.Dispatch) + peer.Free() } // Connect ... func (nm *Handshake) Connect(addr salticidae.NetAddr) { + ip := toIPDesc(addr) + ipStr := ip.String() + if nm.pending.ContainsIP(ip) || nm.connections.ContainsIP(ip) { + return + } + + nm.log.Debug("Adding peer %s", ip) + + //TODO: + if !nm.enableStaking { peer := salticidae.NewPeerIDFromNetAddr(addr, false) - nm.net.AddPeer(peer) - nm.net.SetPeerAddr(peer, addr) - nm.net.ConnPeer(peer, 600, 1) - peer.Free() + nm.ConnectTo(peer, addr) } else { - ip := toIPDesc(addr) - nm.requestedConnections[ip.String()] = struct{}{} + nm.requestedConnections[ipStr] = struct{}{} + + nm.log.Verbo("Attempting to discover peer at %s", ipStr) msgNet := nm.net.AsMsgNetwork() msgNet.Connect(addr) @@ -344,12 +384,7 @@ func connHandler(_conn *C.struct_msgnetwork_conn_t, connected C.bool, _ unsafe.P cert := conn.GetPeerCert() peer := salticidae.NewPeerIDFromX509(cert, false) - - HandshakeNet.net.AddPeer(peer) - HandshakeNet.net.SetPeerAddr(peer, addr) - HandshakeNet.net.ConnPeer(peer, 600, 1) - - peer.Free() + HandshakeNet.ConnectTo(peer, addr) return true } @@ -388,14 +423,14 @@ func (nm *Handshake) disconnectedFromPeer(peer salticidae.PeerID) { cert := ids.ShortID{} if pendingCert, exists := nm.pending.GetID(peer); exists { cert = pendingCert + nm.log.Info("Disconnected from pending peer %s", cert) } else if connectedCert, exists := nm.connections.GetID(peer); exists { cert = connectedCert + nm.log.Info("Disconnected from peer %s", cert) } else { return } - nm.log.Info("Disconnected from %s", cert) - peerBytes := toID(peer) peerID := ids.NewID(peerBytes) @@ -612,20 +647,14 @@ func peerList(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe. cErr := salticidae.NewError() for _, ip := range ips { addr := salticidae.NewNetAddrFromIPPortString(ip.String(), true, &cErr) - if cErr.GetCode() == 0 && !HandshakeNet.myAddr.IsEq(addr) { // Make sure not to connect to myself - ip := toIPDesc(addr) - if !HandshakeNet.pending.ContainsIP(ip) && !HandshakeNet.connections.ContainsIP(ip) { - HandshakeNet.log.Debug("Adding peer %s", ip) - HandshakeNet.Connect(addr) - } + if cErr.GetCode() != 0 || HandshakeNet.myAddr.IsEq(addr) { + // Make sure not to connect to myself + continue } - } -} -func getMsgCert(_conn *C.struct_msgnetwork_conn_t) ids.ShortID { - conn := salticidae.MsgNetworkConnFromC(salticidae.CMsgNetworkConn(_conn)) - return getCert(conn.GetPeerCert()) + HandshakeNet.Connect(addr) + } } func getPeerCert(_conn *C.struct_peernetwork_conn_t) ids.ShortID { @@ -635,13 +664,12 @@ func getPeerCert(_conn *C.struct_peernetwork_conn_t) ids.ShortID { func getCert(cert salticidae.X509) ids.ShortID { der := cert.GetDer(false) - defer der.Free() - certDS := salticidae.NewDataStreamMovedFromByteArray(der, false) - defer certDS.Free() - certBytes := certDS.GetDataInPlace(certDS.Size()).Get() certID, err := ids.ToShortID(hashing.PubkeyBytesToAddress(certBytes)) + + certDS.Free() + der.Free() HandshakeNet.log.AssertNoError(err) return certID } @@ -651,13 +679,3 @@ func checkCompatibility(myVersion string, peerVersion string) bool { // At the moment, we are all compatible. return true } - -func toAddr(ip utils.IPDesc, autoFree bool) salticidae.NetAddr { - err := salticidae.NewError() - addr := salticidae.NewNetAddrFromIPPortString(ip.String(), autoFree, &err) - HandshakeNet.log.AssertTrue(err.GetCode() == 0, "IP Failed parsing") - return addr -} -func toShortID(ip utils.IPDesc) ids.ShortID { - return ids.NewShortID(hashing.ComputeHash160Array([]byte(ip.String()))) -} diff --git a/node/node.go b/node/node.go index 42d334d..2cc7eaa 100644 --- a/node/node.go +++ b/node/node.go @@ -5,7 +5,7 @@ package node // #include "salticidae/network.h" // void onTerm(int sig, void *); -// void errorHandler(SalticidaeCError *, bool, void *); +// void errorHandler(SalticidaeCError *, bool, int32_t, void *); import "C" import ( @@ -130,14 +130,14 @@ func onTerm(C.int, unsafe.Pointer) { } //export errorHandler -func errorHandler(_err *C.struct_SalticidaeCError, fatal C.bool, _ unsafe.Pointer) { +func errorHandler(_err *C.struct_SalticidaeCError, fatal C.bool, asyncID C.int32_t, _ unsafe.Pointer) { err := (*salticidae.Error)(unsafe.Pointer(_err)) if fatal { MainNode.Log.Fatal("Error during async call: %s", salticidae.StrError(err.GetCode())) MainNode.EC.Stop() return } - MainNode.Log.Error("Error during async call: %s", salticidae.StrError(err.GetCode())) + MainNode.Log.Error("Error during async with ID %d call: %s", asyncID, salticidae.StrError(err.GetCode())) } func (n *Node) initNetlib() error { diff --git a/scripts/ansible/inventory.yml b/scripts/ansible/inventory.yml index 5315082..1da8bad 100755 --- a/scripts/ansible/inventory.yml +++ b/scripts/ansible/inventory.yml @@ -1,15 +1,14 @@ borealis_bootstrap: hosts: bootstrap1: - ansible_host: 3.227.207.132 - http_tls_enabled: true - http_tls_key_file: "/home/ubuntu/ssl/privkey.pem" - http_tls_cert_file: "/home/ubuntu/ssl/fullchain.pem" + ansible_host: 3.84.129.247 + staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker1.key" + staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker1.crt" vars: ansible_connection: ssh ansible_user: ubuntu - network_id: "cascade" + network_id: "local" api_admin_enabled: true api_keystore_enabled: true api_metrics_enabled: true @@ -29,7 +28,7 @@ borealis_bootstrap: staking_tls_key_file: "/home/ubuntu/keys/staker.key" staking_tls_cert_file: "/home/ubuntu/keys/staker.crt" log_dir: "/home/ubuntu/.gecko" - log_level: debug + log_level: verbo snow_sample_size: 3 snow_quorum_size: 2 snow_virtuous_commit_threshold: 20 @@ -43,18 +42,26 @@ borealis_bootstrap: borealis_node: hosts: node1: - ansible_host: 34.207.133.167 + ansible_host: 35.153.99.244 + staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker2.key" + staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker2.crt" node2: - ansible_host: 107.23.241.199 + ansible_host: 34.201.137.119 + staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker3.key" + staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker3.crt" node3: - ansible_host: 54.197.215.186 + ansible_host: 54.146.1.110 + staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker4.key" + staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker4.crt" node4: - ansible_host: 18.234.153.22 + ansible_host: 54.91.255.231 + staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker5.key" + staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker5.crt" vars: ansible_connection: ssh ansible_user: ubuntu - network_id: "cascade" + network_id: "local" api_admin_enabled: true api_keystore_enabled: true api_metrics_enabled: true @@ -67,8 +74,8 @@ borealis_node: http_tls_enabled: false http_tls_key_file: "" http_tls_cert_file: "" - bootstrap_ips: "3.227.207.132:21001" - bootstrap_ids: "NX4zVkuiRJZYe6Nzzav7GXN3TakUet3Co" + bootstrap_ips: "3.84.129.247:21001" + bootstrap_ids: "7Xhw2mDxuDS44j42TCB6U5579esbSt3Lg" staking_port: 21001 staking_tls_enabled: true staking_tls_key_file: "/home/ubuntu/keys/staker.key" diff --git a/scripts/ansible/ping_playbook.yml b/scripts/ansible/ping_playbook.yml new file mode 100755 index 0000000..0c47625 --- /dev/null +++ b/scripts/ansible/ping_playbook.yml @@ -0,0 +1,11 @@ + +--- +- name: Update the network + connection: ssh + gather_facts: false + hosts: all + tasks: + - name: Ping node + shell: "ls" + environment: + PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin diff --git a/scripts/ansible/restart_playbook.yml b/scripts/ansible/restart_playbook.yml index 48d44fb..f888d5d 100755 --- a/scripts/ansible/restart_playbook.yml +++ b/scripts/ansible/restart_playbook.yml @@ -8,7 +8,7 @@ ava_binary: ~/go/src/github.com/ava-labs/gecko/build/ava repo_folder: ~/go/src/github.com/ava-labs/gecko repo_name: ava-labs/gecko - repo_branch: cascade + repo_branch: master tasks: - name: Kill Node command: killall ava diff --git a/scripts/ansible/update_playbook.yml b/scripts/ansible/update_playbook.yml index b28def3..9147886 100755 --- a/scripts/ansible/update_playbook.yml +++ b/scripts/ansible/update_playbook.yml @@ -8,7 +8,7 @@ ava_binary: ~/go/src/github.com/ava-labs/gecko/build/ava repo_folder: ~/go/src/github.com/ava-labs/gecko repo_name: ava-labs/gecko - repo_branch: cascade + repo_branch: master tasks: - name: Kill Node command: killall ava @@ -25,6 +25,6 @@ environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin - name: Start node - shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >/dev/null 2>&1 &" + shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >~/all-logs.txt 2>&1 &" environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin diff --git a/snow/engine/avalanche/bootstrapper.go b/snow/engine/avalanche/bootstrapper.go index 0f58194..7bacd46 100644 --- a/snow/engine/avalanche/bootstrapper.go +++ b/snow/engine/avalanche/bootstrapper.go @@ -207,6 +207,7 @@ func (b *bootstrapper) finish() { func (b *bootstrapper) executeAll(jobs *queue.Jobs, numBlocked prometheus.Gauge) { for job, err := jobs.Pop(); err == nil; job, err = jobs.Pop() { numBlocked.Dec() + b.BootstrapConfig.Context.Log.Debug("Executing: %s", job.ID()) if err := jobs.Execute(job); err != nil { b.BootstrapConfig.Context.Log.Warn("Error executing: %s", err) } diff --git a/snow/engine/avalanche/voter.go b/snow/engine/avalanche/voter.go index 7430495..90b7b2f 100644 --- a/snow/engine/avalanche/voter.go +++ b/snow/engine/avalanche/voter.go @@ -79,12 +79,23 @@ func (v *voter) bubbleVotes(votes ids.UniqueBag) ids.UniqueBag { vtx := vts[0] vts = vts[1:] - if status := vtx.Status(); status.Fetched() && !v.t.Consensus.VertexIssued(vtx) { - vts = append(vts, vtx.Parents()...) - } else if !status.Decided() && v.t.Consensus.VertexIssued(vtx) { + status := vtx.Status() + if !status.Fetched() { + v.t.Config.Context.Log.Debug("Dropping %d vote(s) for %s because the vertex is unknown", set.Len(), vtx.ID()) + continue + } + + if status.Decided() { + v.t.Config.Context.Log.Debug("Dropping %d vote(s) for %s because the vertex is accepted", set.Len(), vtx.ID()) + continue + } + + if v.t.Consensus.VertexIssued(vtx) { + v.t.Config.Context.Log.Debug("Applying %d vote(s) for %s", set.Len(), vtx.ID()) bubbledVotes.UnionSet(vtx.ID(), set) } else { - v.t.Config.Context.Log.Debug("Dropping %d vote(s) for %s because the vertex is invalid", set.Len(), vtx.ID()) + v.t.Config.Context.Log.Debug("Bubbling %d vote(s) for %s because the vertex isn't issued", set.Len(), vtx.ID()) + vts = append(vts, vtx.Parents()...) } } } From 3c9187fc7b4c2353bdc334bfaab0f83460ccf7df Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 20 Apr 2020 20:25:22 -0400 Subject: [PATCH 04/12] wip --- networking/handshake_handlers.go | 59 +++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index 94ac481..4ba44cc 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -187,18 +187,26 @@ func (nm *Handshake) Initialize( net.RegHandler(PeerList, salticidae.MsgNetworkMsgCallback(C.peerList), nil) } -// ConnectTo add the peer as a connection and connects to them. Will free peer -func (nm *Handshake) ConnectTo(peer salticidae.PeerID, addr salticidae.NetAddr) { - if !nm.pending.ContainsPeerID(peer) && !nm.connections.ContainsPeerID(peer) { - HandshakeNet.net.AddPeer(peer) - HandshakeNet.net.SetPeerAddr(peer, addr) - HandshakeNet.net.ConnPeer(peer, 600, 1) - - // TODO: Should add the peer to the pending set, register a timeout to - // remove the peer +// ConnectTo add the peer as a connection and connects to them. +func (nm *Handshake) ConnectTo(peer salticidae.PeerID, stakerID ids.ShortID, addr salticidae.NetAddr) { + if nm.pending.ContainsPeerID(peer) || nm.connections.ContainsPeerID(peer) { + return } - peer.Free() + nm.net.AddPeer(peer) + nm.net.SetPeerAddr(peer, addr) + nm.net.ConnPeer(peer, 600, 1) + + ip := toIPDesc(addr) + nm.pending.Add(peer, stakerID, ip) + + peerBytes := toID(peer) + peerID := ids.NewID(peerBytes) + + nm.reconnectTimeout.Put(peerID, func() { + nm.pending.Remove(peer, stakerID) + nm.net.DelPeer(peer) + }) } // Connect ... @@ -211,19 +219,37 @@ func (nm *Handshake) Connect(addr salticidae.NetAddr) { nm.log.Debug("Adding peer %s", ip) - //TODO: - if !nm.enableStaking { - peer := salticidae.NewPeerIDFromNetAddr(addr, false) + peer := salticidae.NewPeerIDFromNetAddr(addr, true) nm.ConnectTo(peer, addr) - } else { - nm.requestedConnections[ipStr] = struct{}{} + return + } + + count := new(int) + *count = 600 + handler := new(func()) + *handler = func() { + nm.requestedLock.Lock() + defer nm.requestedLock.Unlock() + + if *count <= 0 { + delete(nm.requested, ipStr) + return + } + *count-- + + if nm.pending.ContainsIP(ip) || nm.connections.ContainsIP(ip) { + return + } nm.log.Verbo("Attempting to discover peer at %s", ipStr) + nm.requested[ipStr] = struct{}{} + msgNet := nm.net.AsMsgNetwork() msgNet.Connect(addr) } + (*handler)() } // AwaitConnections ... @@ -383,7 +409,8 @@ func connHandler(_conn *C.struct_msgnetwork_conn_t, connected C.bool, _ unsafe.P delete(HandshakeNet.requestedConnections, ipStr) cert := conn.GetPeerCert() - peer := salticidae.NewPeerIDFromX509(cert, false) + peer := salticidae.NewPeerIDFromX509(cert, true) + HandshakeNet.ConnectTo(peer, addr) return true } From 2c76dd19544670504b56e05b06ea0288ce6bf56d Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 20 Apr 2020 22:33:33 -0400 Subject: [PATCH 05/12] wip --- networking/handshake_handlers.go | 90 ++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index 4ba44cc..daa0aff 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -114,16 +114,16 @@ type Handshake struct { // Connections that I have added by IP, but haven't gotten an ID from requestedLock sync.Mutex requested map[string]struct{} - requestedTimeout timer.TimeoutManager + requestedTimeout timer.TimeoutManager // keys are hashes of the ip:port string // Connections that I have added as a peer, but haven't gotten a version // message from pending Connections - versionTimeout timer.TimeoutManager + versionTimeout timer.TimeoutManager // keys are the peer IDs // Connections that I have gotten a valid version message from connections Connections - reconnectTimeout timer.TimeoutManager + reconnectTimeout timer.TimeoutManager // keys are the peer IDs // IPs of nodes I'm connected to will be repeatedly gossiped throughout the network peerListGossiper *timer.Repeater @@ -193,6 +193,8 @@ func (nm *Handshake) ConnectTo(peer salticidae.PeerID, stakerID ids.ShortID, add return } + nm.log.Info("Attempting to connect to %s", stakerID) + nm.net.AddPeer(peer) nm.net.SetPeerAddr(peer, addr) nm.net.ConnPeer(peer, 600, 1) @@ -205,7 +207,10 @@ func (nm *Handshake) ConnectTo(peer salticidae.PeerID, stakerID ids.ShortID, add nm.reconnectTimeout.Put(peerID, func() { nm.pending.Remove(peer, stakerID) + nm.connections.Remove(peer, stakerID) nm.net.DelPeer(peer) + + nm.numPeers.Set(float64(nm.connections.Len())) }) } @@ -217,11 +222,11 @@ func (nm *Handshake) Connect(addr salticidae.NetAddr) { return } - nm.log.Debug("Adding peer %s", ip) + nm.log.Info("Adding peer %s", ip) if !nm.enableStaking { peer := salticidae.NewPeerIDFromNetAddr(addr, true) - nm.ConnectTo(peer, addr) + nm.ConnectTo(peer, toShortID(ip), addr) return } @@ -232,6 +237,14 @@ func (nm *Handshake) Connect(addr salticidae.NetAddr) { nm.requestedLock.Lock() defer nm.requestedLock.Unlock() + if *count == 600 { + nm.requested[ipStr] = struct{}{} + } + + if _, exists := nm.requested[ipStr]; !exists { + return + } + if *count <= 0 { delete(nm.requested, ipStr) return @@ -242,12 +255,13 @@ func (nm *Handshake) Connect(addr salticidae.NetAddr) { return } - nm.log.Verbo("Attempting to discover peer at %s", ipStr) - - nm.requested[ipStr] = struct{}{} + nm.log.Info("Attempting to discover peer at %s", ipStr) msgNet := nm.net.AsMsgNetwork() msgNet.Connect(addr) + + ipID := ids.NewID(hashing.ComputeHash256Array([]byte(ipStr))) + nm.requestedTimeout.Put(ipID, *handler) } (*handler)() } @@ -397,21 +411,27 @@ func connHandler(_conn *C.struct_msgnetwork_conn_t, connected C.bool, _ unsafe.P return connected } + HandshakeNet.requestedLock.Lock() + defer HandshakeNet.requestedLock.Unlock() + conn := salticidae.MsgNetworkConnFromC(salticidae.CMsgNetworkConn(_conn)) addr := conn.GetAddr() ip := toIPDesc(addr) - ipStr := ip.String() - if _, exists := HandshakeNet.requestedConnections[ipStr]; !exists { + + ipID := ids.NewID(hashing.ComputeHash256Array([]byte(ipStr))) + HandshakeNet.requestedTimeout.Remove(ipID) + + if _, exists := HandshakeNet.requested[ipStr]; !exists { HandshakeNet.log.Debug("connHandler called with %s", ip) return true } - delete(HandshakeNet.requestedConnections, ipStr) + delete(HandshakeNet.requested, ipStr) cert := conn.GetPeerCert() peer := salticidae.NewPeerIDFromX509(cert, true) - HandshakeNet.ConnectTo(peer, addr) + HandshakeNet.ConnectTo(peer, getCert(cert), addr) return true } @@ -434,8 +454,6 @@ func (nm *Handshake) connectedToPeer(conn *C.struct_peernetwork_conn_t, peer sal nm.reconnectTimeout.Remove(peerID) - nm.pending.Add(peer, cert, utils.IPDesc{}) - handler := new(func()) *handler = func() { if nm.pending.ContainsPeerID(peer) { @@ -461,23 +479,28 @@ func (nm *Handshake) disconnectedFromPeer(peer salticidae.PeerID) { peerBytes := toID(peer) peerID := ids.NewID(peerBytes) + nm.versionTimeout.Remove(peerID) + nm.connections.Remove(peer, cert) + nm.numPeers.Set(float64(nm.connections.Len())) + if nm.vdrs.Contains(cert) { nm.reconnectTimeout.Put(peerID, func() { + nm.pending.Remove(peer, cert) + nm.connections.Remove(peer, cert) nm.net.DelPeer(peer) + + nm.numPeers.Set(float64(nm.connections.Len())) }) + nm.pending.Add(peer, cert, utils.IPDesc{}) } else { + nm.pending.Remove(peer, cert) nm.net.DelPeer(peer) } - nm.versionTimeout.Remove(peerID) if !nm.enableStaking { nm.vdrs.Remove(cert) } - nm.pending.RemovePeerID(peer) - nm.connections.RemovePeerID(peer) - nm.numPeers.Set(float64(nm.connections.Len())) - nm.awaitingLock.Lock() defer nm.awaitingLock.Unlock() for _, awaiting := range HandshakeNet.awaiting { @@ -513,19 +536,27 @@ func unknownPeerHandler(_addr *C.netaddr_t, _cert *C.x509_t, _ unsafe.Pointer) { HandshakeNet.log.Info("Adding peer %s", ip) var peer salticidae.PeerID + var id ids.ShortID if HandshakeNet.enableStaking { cert := salticidae.X509FromC(salticidae.CX509(_cert)) peer = salticidae.NewPeerIDFromX509(cert, true) + id = getCert(cert) } else { peer = salticidae.NewPeerIDFromNetAddr(addr, true) + id = toShortID(ip) } peerBytes := toID(peer) peerID := ids.NewID(peerBytes) HandshakeNet.reconnectTimeout.Put(peerID, func() { + HandshakeNet.pending.Remove(peer, id) + HandshakeNet.connections.Remove(peer, id) HandshakeNet.net.DelPeer(peer) + + HandshakeNet.numPeers.Set(float64(HandshakeNet.connections.Len())) }) + HandshakeNet.pending.Add(peer, id, utils.IPDesc{}) HandshakeNet.net.AddPeer(peer) } @@ -568,12 +599,17 @@ func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.P conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn)) peer := conn.GetPeerID(true) + peerBytes := toID(peer) + peerID := ids.NewID(peerBytes) + + HandshakeNet.versionTimeout.Remove(peerID) + id, exists := HandshakeNet.pending.GetID(peer) if !exists { + HandshakeNet.log.Warn("Dropping Version message because the peer isn't pending") return } - - defer HandshakeNet.pending.Remove(peer, id) + HandshakeNet.pending.Remove(peer, id) build := Builder{} pMsg, err := build.Parse(Version, msg.GetPayloadByMove()) @@ -612,18 +648,12 @@ func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.P HandshakeNet.SendPeerList(peer) HandshakeNet.connections.Add(peer, id, ip) - - peerBytes := toID(peer) - peerID := ids.NewID(peerBytes) - - HandshakeNet.versionTimeout.Remove(peerID) + HandshakeNet.numPeers.Set(float64(HandshakeNet.connections.Len())) if !HandshakeNet.enableStaking { HandshakeNet.vdrs.Add(validators.NewValidator(id, 1)) } - HandshakeNet.numPeers.Set(float64(HandshakeNet.connections.Len())) - HandshakeNet.awaitingLock.Lock() defer HandshakeNet.awaitingLock.Unlock() @@ -706,3 +736,7 @@ func checkCompatibility(myVersion string, peerVersion string) bool { // At the moment, we are all compatible. return true } + +func toShortID(ip utils.IPDesc) ids.ShortID { + return ids.NewShortID(hashing.ComputeHash160Array([]byte(ip.String()))) +} From 386ef6b0008cc37630a496ac2e27d8f24882db78 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Tue, 21 Apr 2020 01:31:10 -0400 Subject: [PATCH 06/12] cleaned up logging in avalanche bubbling --- snow/engine/avalanche/voter.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snow/engine/avalanche/voter.go b/snow/engine/avalanche/voter.go index 90b7b2f..6e52a34 100644 --- a/snow/engine/avalanche/voter.go +++ b/snow/engine/avalanche/voter.go @@ -81,20 +81,20 @@ func (v *voter) bubbleVotes(votes ids.UniqueBag) ids.UniqueBag { status := vtx.Status() if !status.Fetched() { - v.t.Config.Context.Log.Debug("Dropping %d vote(s) for %s because the vertex is unknown", set.Len(), vtx.ID()) + v.t.Config.Context.Log.Verbo("Dropping %d vote(s) for %s because the vertex is unknown", set.Len(), vtx.ID()) continue } if status.Decided() { - v.t.Config.Context.Log.Debug("Dropping %d vote(s) for %s because the vertex is accepted", set.Len(), vtx.ID()) + v.t.Config.Context.Log.Verbo("Dropping %d vote(s) for %s because the vertex is decided", set.Len(), vtx.ID()) continue } if v.t.Consensus.VertexIssued(vtx) { - v.t.Config.Context.Log.Debug("Applying %d vote(s) for %s", set.Len(), vtx.ID()) + v.t.Config.Context.Log.Verbo("Applying %d vote(s) for %s", set.Len(), vtx.ID()) bubbledVotes.UnionSet(vtx.ID(), set) } else { - v.t.Config.Context.Log.Debug("Bubbling %d vote(s) for %s because the vertex isn't issued", set.Len(), vtx.ID()) + v.t.Config.Context.Log.Verbo("Bubbling %d vote(s) for %s because the vertex isn't issued", set.Len(), vtx.ID()) vts = append(vts, vtx.Parents()...) } } From 14f178612eff57589577957a15c763d42d7bf8ce Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Tue, 21 Apr 2020 01:57:10 -0400 Subject: [PATCH 07/12] added plugin dir flag --- scripts/ansible/inventory.yml | 2 ++ scripts/ansible/restart_playbook.yml | 6 +++--- scripts/ansible/update_playbook.yml | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/ansible/inventory.yml b/scripts/ansible/inventory.yml index 1da8bad..0964550 100755 --- a/scripts/ansible/inventory.yml +++ b/scripts/ansible/inventory.yml @@ -27,6 +27,7 @@ borealis_bootstrap: staking_tls_enabled: true staking_tls_key_file: "/home/ubuntu/keys/staker.key" staking_tls_cert_file: "/home/ubuntu/keys/staker.crt" + plugin_dir: "/home/ubuntu/go/src/github.com/ava-labs/gecko/build/plugins" log_dir: "/home/ubuntu/.gecko" log_level: verbo snow_sample_size: 3 @@ -80,6 +81,7 @@ borealis_node: staking_tls_enabled: true staking_tls_key_file: "/home/ubuntu/keys/staker.key" staking_tls_cert_file: "/home/ubuntu/keys/staker.crt" + plugin_dir: "/home/ubuntu/go/src/github.com/ava-labs/gecko/build/plugins" log_dir: "/home/ubuntu/.gecko" log_level: debug snow_sample_size: 3 diff --git a/scripts/ansible/restart_playbook.yml b/scripts/ansible/restart_playbook.yml index f888d5d..deec014 100755 --- a/scripts/ansible/restart_playbook.yml +++ b/scripts/ansible/restart_playbook.yml @@ -7,8 +7,8 @@ vars: ava_binary: ~/go/src/github.com/ava-labs/gecko/build/ava repo_folder: ~/go/src/github.com/ava-labs/gecko - repo_name: ava-labs/gecko - repo_branch: master + repo_name: ava-labs/gecko-internal + repo_branch: retry-connections tasks: - name: Kill Node command: killall ava @@ -33,6 +33,6 @@ path: "{{ log_dir }}" state: absent - name: Start node - shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >/dev/null 2>&1 &" + shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --plugin-dir={{ plugin_dir }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >/dev/null 2>&1 &" environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin diff --git a/scripts/ansible/update_playbook.yml b/scripts/ansible/update_playbook.yml index 9147886..478afec 100755 --- a/scripts/ansible/update_playbook.yml +++ b/scripts/ansible/update_playbook.yml @@ -7,8 +7,8 @@ vars: ava_binary: ~/go/src/github.com/ava-labs/gecko/build/ava repo_folder: ~/go/src/github.com/ava-labs/gecko - repo_name: ava-labs/gecko - repo_branch: master + repo_name: ava-labs/gecko-internal + repo_branch: retry-connections tasks: - name: Kill Node command: killall ava @@ -25,6 +25,6 @@ environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin - name: Start node - shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >~/all-logs.txt 2>&1 &" + shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --plugin-dir={{ plugin_dir }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >~thelogs.txt 2>&1 &" environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin From a3e3e5f21eefcee08223df14a309b8d5b92127f7 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Tue, 21 Apr 2020 13:55:01 -0400 Subject: [PATCH 08/12] changed ansible back to the testnet --- scripts/ansible/inventory.yml | 33 ++++++++++++----------------- scripts/ansible/update_playbook.yml | 2 +- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/scripts/ansible/inventory.yml b/scripts/ansible/inventory.yml index 0964550..a115a90 100755 --- a/scripts/ansible/inventory.yml +++ b/scripts/ansible/inventory.yml @@ -1,14 +1,15 @@ borealis_bootstrap: hosts: bootstrap1: - ansible_host: 3.84.129.247 - staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker1.key" - staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker1.crt" + ansible_host: 3.227.207.132 + http_tls_enabled: true + http_tls_key_file: "/home/ubuntu/ssl/privkey.pem" + http_tls_cert_file: "/home/ubuntu/ssl/fullchain.pem" vars: ansible_connection: ssh ansible_user: ubuntu - network_id: "local" + network_id: "cascade" api_admin_enabled: true api_keystore_enabled: true api_metrics_enabled: true @@ -29,7 +30,7 @@ borealis_bootstrap: staking_tls_cert_file: "/home/ubuntu/keys/staker.crt" plugin_dir: "/home/ubuntu/go/src/github.com/ava-labs/gecko/build/plugins" log_dir: "/home/ubuntu/.gecko" - log_level: verbo + log_level: debug snow_sample_size: 3 snow_quorum_size: 2 snow_virtuous_commit_threshold: 20 @@ -43,26 +44,18 @@ borealis_bootstrap: borealis_node: hosts: node1: - ansible_host: 35.153.99.244 - staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker2.key" - staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker2.crt" + ansible_host: 34.207.133.167 node2: - ansible_host: 34.201.137.119 - staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker3.key" - staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker3.crt" + ansible_host: 107.23.241.199 node3: - ansible_host: 54.146.1.110 - staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker4.key" - staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker4.crt" + ansible_host: 54.197.215.186 node4: - ansible_host: 54.91.255.231 - staking_tls_key_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker5.key" - staking_tls_cert_file: "/home/ubuntu/go/src/github.com/ava-labs/gecko/keys/local/staker5.crt" + ansible_host: 18.234.153.22 vars: ansible_connection: ssh ansible_user: ubuntu - network_id: "local" + network_id: "cascade" api_admin_enabled: true api_keystore_enabled: true api_metrics_enabled: true @@ -75,8 +68,8 @@ borealis_node: http_tls_enabled: false http_tls_key_file: "" http_tls_cert_file: "" - bootstrap_ips: "3.84.129.247:21001" - bootstrap_ids: "7Xhw2mDxuDS44j42TCB6U5579esbSt3Lg" + bootstrap_ips: "3.227.207.132:21001" + bootstrap_ids: "NX4zVkuiRJZYe6Nzzav7GXN3TakUet3Co" staking_port: 21001 staking_tls_enabled: true staking_tls_key_file: "/home/ubuntu/keys/staker.key" diff --git a/scripts/ansible/update_playbook.yml b/scripts/ansible/update_playbook.yml index 478afec..e66dc17 100755 --- a/scripts/ansible/update_playbook.yml +++ b/scripts/ansible/update_playbook.yml @@ -25,6 +25,6 @@ environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin - name: Start node - shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --plugin-dir={{ plugin_dir }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >~thelogs.txt 2>&1 &" + shell: "nohup {{ ava_binary }} --network-id={{ network_id }} --api-admin-enabled={{ api_admin_enabled }} --api-keystore-enabled={{ api_keystore_enabled }} --api-metrics-enabled={{ api_metrics_enabled }} --ava-tx-fee={{ ava_tx_fee }} --assertions-enabled={{ assertions_enabled }} --signature-verification-enabled={{ signature_verification_enabled }} --db-enabled={{ db_enabled }} --db-dir={{ db_dir }} --http-port={{ http_port }} --http-tls-enabled={{ http_tls_enabled }} --http-tls-key-file={{ http_tls_key_file }} --http-tls-cert-file={{ http_tls_cert_file }} --bootstrap-ips={{ bootstrap_ips }} --bootstrap-ids={{ bootstrap_ids }} --public-ip={{ ansible_host }} --staking-port={{ staking_port }} --staking-tls-enabled={{ staking_tls_enabled }} --staking-tls-key-file={{ staking_tls_key_file }} --staking-tls-cert-file={{ staking_tls_cert_file }} --plugin-dir={{ plugin_dir }} --log-dir={{ log_dir }} --log-level={{ log_level }} --snow-sample-size={{ snow_sample_size }} --snow-quorum-size={{ snow_quorum_size }} --snow-virtuous-commit-threshold={{ snow_virtuous_commit_threshold }} --snow-rogue-commit-threshold={{ snow_rogue_commit_threshold }} --snow-avalanche-num-parents={{ snow_avalanche_num_parents }} --snow-avalanche-batch-size={{ snow_avalanche_batch_size }} --api-ipcs-enabled={{ api_ipcs_enabled }} --xput-server-enabled={{ xput_server_enabled }} --xput-server-port={{ xput_server_port }} >/dev/null 2>&1 &" environment: PATH: /sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/snap/bin From f592ecfb7bd2915dab01e70b283d00205b68d2c0 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Tue, 21 Apr 2020 14:32:00 -0400 Subject: [PATCH 09/12] Don't refresh peers from peerlists if already attempting to connect --- networking/handshake_handlers.go | 20 +++++++++++++++----- node/node.go | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index daa0aff..18476f9 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -77,7 +77,7 @@ const ( // ConnectTimeout is the amount of time to wait before attempt to connect to // an unknown peer - ConnectTimeout = time.Second + ConnectTimeout = 6 * time.Second // GetVersionTimeout is the amount of time to wait before sending a // getVersion message to a partially connected peer GetVersionTimeout = 2 * time.Second @@ -222,22 +222,32 @@ func (nm *Handshake) Connect(addr salticidae.NetAddr) { return } - nm.log.Info("Adding peer %s", ip) - if !nm.enableStaking { + nm.log.Info("Adding peer %s", ip) + peer := salticidae.NewPeerIDFromNetAddr(addr, true) nm.ConnectTo(peer, toShortID(ip), addr) return } + nm.requestedLock.Lock() + _, exists := nm.requested[ipStr] + nm.requestedLock.Unlock() + + if exists { + return + } + + nm.log.Info("Adding peer %s", ip) + count := new(int) - *count = 600 + *count = 100 handler := new(func()) *handler = func() { nm.requestedLock.Lock() defer nm.requestedLock.Unlock() - if *count == 600 { + if *count == 100 { nm.requested[ipStr] = struct{}{} } diff --git a/node/node.go b/node/node.go index 2cc7eaa..0f3d105 100644 --- a/node/node.go +++ b/node/node.go @@ -137,7 +137,7 @@ func errorHandler(_err *C.struct_SalticidaeCError, fatal C.bool, asyncID C.int32 MainNode.EC.Stop() return } - MainNode.Log.Error("Error during async with ID %d call: %s", asyncID, salticidae.StrError(err.GetCode())) + MainNode.Log.Debug("Error during async with ID %d call: %s", asyncID, salticidae.StrError(err.GetCode())) } func (n *Node) initNetlib() error { From f1cfa2aa511867175bd938cd84701279cc0740c7 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Wed, 22 Apr 2020 12:46:54 -0400 Subject: [PATCH 10/12] Added server ip + port to error logging --- node/node.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/node.go b/node/node.go index 0f3d105..d4b2c0e 100644 --- a/node/node.go +++ b/node/node.go @@ -254,7 +254,7 @@ func (n *Node) StartConsensusServer() error { // Listen for P2P messages n.PeerNet.Listen(serverIP, &err) if code := err.GetCode(); code != 0 { - return fmt.Errorf("failed to start consensus server: %s", salticidae.StrError(code)) + return fmt.Errorf("failed to listen on consensus server at %s: %s", n.Config.StakingIP, salticidae.StrError(code)) } // Start a server to handle throughput tests if configuration says to. Disabled by default. @@ -268,7 +268,7 @@ func (n *Node) StartConsensusServer() error { n.ClientNet.Listen(clientIP, &err) if code := err.GetCode(); code != 0 { - return fmt.Errorf("failed to listen on xput server: %s", salticidae.StrError(code)) + return fmt.Errorf("failed to listen on xput server at 127.0.0.1:%d: %s", n.Config.ThroughputPort, salticidae.StrError(code)) } } From a4a171c2190118ea40211f48f329361cbb7cca2e Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Wed, 22 Apr 2020 15:15:48 -0400 Subject: [PATCH 11/12] Changed connTimeout to 60s rather than defaulting to 3m --- node/node.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/node.go b/node/node.go index d4b2c0e..3aafae6 100644 --- a/node/node.go +++ b/node/node.go @@ -152,6 +152,8 @@ func (n *Node) initNetlib() error { // Create peer network config, may have tls enabled peerConfig := salticidae.NewPeerNetworkConfig() + peerConfig.ConnTimeout(60) + msgConfig := peerConfig.AsMsgNetworkConfig() msgConfig.MaxMsgSize(maxMessageSize) From 436972b928fd2fd91a184dd3995ec1f1a41d0946 Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Wed, 22 Apr 2020 19:52:37 -0400 Subject: [PATCH 12/12] Added kill script --- scripts/ansible/kill_playbook.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 scripts/ansible/kill_playbook.yml diff --git a/scripts/ansible/kill_playbook.yml b/scripts/ansible/kill_playbook.yml new file mode 100755 index 0000000..6e91645 --- /dev/null +++ b/scripts/ansible/kill_playbook.yml @@ -0,0 +1,9 @@ + +--- +- name: Update the network + connection: ssh + gather_facts: false + hosts: all + tasks: + - name: Kill Node + command: killall ava \ No newline at end of file