Merge pull request #1352 from LukasKorba/1351-Recover-from-download-issues
[#1351] Recover from block stream issues
This commit is contained in:
commit
06d2b6986f
|
@ -11,6 +11,9 @@ and this library adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
### [#1346] Troubleshooting synchronization
|
### [#1346] Troubleshooting synchronization
|
||||||
We focused on performance of the synchronization and found out a root cause in progress reporting. Simple change reduced the synchronization significantly by reporting less frequently. This affect the UX a bit because the % of the sync is updated only every 500 scanned blocks instead of every 100. Proper solution is going to be handled in #1353.
|
We focused on performance of the synchronization and found out a root cause in progress reporting. Simple change reduced the synchronization significantly by reporting less frequently. This affect the UX a bit because the % of the sync is updated only every 500 scanned blocks instead of every 100. Proper solution is going to be handled in #1353.
|
||||||
|
|
||||||
|
### [#1351] Recover from block stream issues
|
||||||
|
Async block stream grpc calls sometimes fail with unknown error 14, most of the times represented as `Transport became inactive` or `NIOHTTP2.StreamClosed`. Unless the service is truly down, these errors are usually false positive ones. The SDK was able to recover from this error with the next sync triggered but it takes 10-30s to happen. This delay is unnecessary so we made 2 changes. When these errors are caught the next sync is triggered immediately (at most 3 times) + the error state is not passed to the clients.
|
||||||
|
|
||||||
# 2.0.5 - 2023-12-15
|
# 2.0.5 - 2023-12-15
|
||||||
|
|
||||||
## Added
|
## Added
|
||||||
|
|
|
@ -40,6 +40,7 @@ actor CompactBlockProcessor {
|
||||||
private let fileManager: ZcashFileManager
|
private let fileManager: ZcashFileManager
|
||||||
|
|
||||||
private var retryAttempts: Int = 0
|
private var retryAttempts: Int = 0
|
||||||
|
private var blockStreamRetryAttempts: Int = 0
|
||||||
private var backoffTimer: Timer?
|
private var backoffTimer: Timer?
|
||||||
private var consecutiveChainValidationErrors: Int = 0
|
private var consecutiveChainValidationErrors: Int = 0
|
||||||
|
|
||||||
|
@ -263,6 +264,7 @@ extension CompactBlockProcessor {
|
||||||
func start(retry: Bool = false) async {
|
func start(retry: Bool = false) async {
|
||||||
if retry {
|
if retry {
|
||||||
self.retryAttempts = 0
|
self.retryAttempts = 0
|
||||||
|
self.blockStreamRetryAttempts = 0
|
||||||
self.backoffTimer?.invalidate()
|
self.backoffTimer?.invalidate()
|
||||||
self.backoffTimer = nil
|
self.backoffTimer = nil
|
||||||
}
|
}
|
||||||
|
@ -289,6 +291,7 @@ extension CompactBlockProcessor {
|
||||||
self.backoffTimer = nil
|
self.backoffTimer = nil
|
||||||
await stopAllActions()
|
await stopAllActions()
|
||||||
retryAttempts = 0
|
retryAttempts = 0
|
||||||
|
blockStreamRetryAttempts = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func latestHeight() async throws -> BlockHeight {
|
func latestHeight() async throws -> BlockHeight {
|
||||||
|
@ -530,7 +533,17 @@ extension CompactBlockProcessor {
|
||||||
await stopAllActions()
|
await stopAllActions()
|
||||||
logger.error("Sync failed with error: \(error)")
|
logger.error("Sync failed with error: \(error)")
|
||||||
|
|
||||||
if Task.isCancelled {
|
// catching the block stream error
|
||||||
|
if case ZcashError.serviceBlockStreamFailed = error, self.blockStreamRetryAttempts < ZcashSDK.blockStreamRetries {
|
||||||
|
// This may be false positive communication error that is usually resolved by retry.
|
||||||
|
// We will try to reset the sync and continue but this will we done at most `ZcashSDK.blockStreamRetries` times.
|
||||||
|
logger.error("ZcashError.serviceBlockStreamFailed, retry is available, starting the sync all over again.")
|
||||||
|
|
||||||
|
self.blockStreamRetryAttempts += 1
|
||||||
|
|
||||||
|
// Start sync all over again
|
||||||
|
await resetContext()
|
||||||
|
} else if Task.isCancelled {
|
||||||
logger.info("Processing cancelled.")
|
logger.info("Processing cancelled.")
|
||||||
do {
|
do {
|
||||||
if try await syncTaskWasCancelled() {
|
if try await syncTaskWasCancelled() {
|
||||||
|
@ -545,13 +558,8 @@ extension CompactBlockProcessor {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if await handleSyncFailure(action: action, error: error) {
|
await handleSyncFailure(action: action, error: error)
|
||||||
// Start sync all over again
|
break
|
||||||
await resetContext()
|
|
||||||
} else {
|
|
||||||
// end the sync loop
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -567,15 +575,13 @@ extension CompactBlockProcessor {
|
||||||
return try await handleAfterSyncHooks()
|
return try await handleAfterSyncHooks()
|
||||||
}
|
}
|
||||||
|
|
||||||
private func handleSyncFailure(action: Action, error: Error) async -> Bool {
|
private func handleSyncFailure(action: Action, error: Error) async {
|
||||||
if action.removeBlocksCacheWhenFailed {
|
if action.removeBlocksCacheWhenFailed {
|
||||||
await ifTaskIsNotCanceledClearCompactBlockCache()
|
await ifTaskIsNotCanceledClearCompactBlockCache()
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.error("Sync failed with error: \(error)")
|
logger.error("Sync failed with error: \(error)")
|
||||||
await failure(error)
|
await failure(error)
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// swiftlint:disable:next cyclomatic_complexity
|
// swiftlint:disable:next cyclomatic_complexity
|
||||||
|
@ -642,6 +648,7 @@ extension CompactBlockProcessor {
|
||||||
latestBlockHeightWhenSyncing > 0 && latestBlockHeightWhenSyncing < latestBlockHeight
|
latestBlockHeightWhenSyncing > 0 && latestBlockHeightWhenSyncing < latestBlockHeight
|
||||||
|
|
||||||
retryAttempts = 0
|
retryAttempts = 0
|
||||||
|
blockStreamRetryAttempts = 0
|
||||||
consecutiveChainValidationErrors = 0
|
consecutiveChainValidationErrors = 0
|
||||||
|
|
||||||
let lastScannedHeight = await latestBlocksDataProvider.maxScannedHeight
|
let lastScannedHeight = await latestBlocksDataProvider.maxScannedHeight
|
||||||
|
|
|
@ -105,6 +105,11 @@ public enum ZcashSDK {
|
||||||
// TODO: [#1304] smart retry logic, https://github.com/zcash/ZcashLightClientKit/issues/1304
|
// TODO: [#1304] smart retry logic, https://github.com/zcash/ZcashLightClientKit/issues/1304
|
||||||
public static let defaultRetries = Int.max
|
public static let defaultRetries = Int.max
|
||||||
|
|
||||||
|
/// The communication errors are represented as serviceBlockStreamFailed : LightWalletServiceError, unavailable 14
|
||||||
|
/// These cases are usually false positive and another try will continue the work, in case the service is trully down we
|
||||||
|
/// cap the amount of retries by this value.
|
||||||
|
public static let blockStreamRetries = 3
|
||||||
|
|
||||||
/// The default maximum amount of time to wait during retry backoff intervals. Failed loops will never wait longer than
|
/// The default maximum amount of time to wait during retry backoff intervals. Failed loops will never wait longer than
|
||||||
/// this before retrying.
|
/// this before retrying.
|
||||||
public static let defaultMaxBackOffInterval: TimeInterval = 600
|
public static let defaultMaxBackOffInterval: TimeInterval = 600
|
||||||
|
|
Loading…
Reference in New Issue