Skip to content

Commit 008e488

Browse files
authored
portal-bridge: add content gossip retries based on accept metadata (#3762)
1 parent 1632516 commit 008e488

File tree

1 file changed

+56
-12
lines changed

1 file changed

+56
-12
lines changed

portal/bridge/history/portal_history_bridge.nim

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -221,18 +221,62 @@ proc runHistory*(config: PortalBridgeConf) =
221221
contentKeyHex = contentKey.toHex()
222222
contentValueHex = contentValue.toHex()
223223

224-
try:
225-
let putContentResult = await bridge.portalClient.portal_historyPutContent(
226-
contentKeyHex, contentValueHex
227-
)
228-
debug "Content gossiped",
229-
peers = putContentResult.peerCount, contentKey = contentKeyHex
230-
except CancelledError as e:
231-
trace "Cancelled gossipWorker"
232-
raise e
233-
except CatchableError as e:
234-
error "JSON-RPC portal_historyPutContent failed",
235-
error = $e.msg, contentKey = contentKeyHex
224+
while true:
225+
try:
226+
let putContentResult = await bridge.portalClient.portal_historyPutContent(
227+
contentKeyHex, contentValueHex
228+
)
229+
let
230+
peers = putContentResult.peerCount
231+
accepted = putContentResult.acceptMetadata.acceptedCount
232+
alreadyStored = putContentResult.acceptMetadata.alreadyStoredCount
233+
notWithinRadius = putContentResult.acceptMetadata.notWithinRadiusCount
234+
genericDecline = putContentResult.acceptMetadata.genericDeclineCount
235+
rateLimited = putContentResult.acceptMetadata.rateLimitedCount
236+
transferInProgress =
237+
putContentResult.acceptMetadata.transferInProgressCount
238+
239+
logScope:
240+
contentKey = contentKeyHex
241+
242+
debug "Content gossiped",
243+
peers, accepted, genericDecline, alreadyStored, notWithinRadius,
244+
rateLimited, transferInProgress
245+
246+
# Conditions below are assumed on correct and non malicious behavior of the peers.
247+
if peers == genericDecline + rateLimited + transferInProgress:
248+
# No peers accepted or already stored the content.
249+
# Decline reasons are likely temporary, so retry.
250+
warn "All peers declined, rate limited, or transfer in progress; retrying...",
251+
contentKey = contentKeyHex
252+
# Sleep 5 seconds to back off a bit before retrying
253+
await sleepAsync(5.seconds)
254+
# Note i: might want to introduce exponential backoff here
255+
# Note ii: Due to the fact that consecutive block numbers have consecutive content
256+
# ids until the hash function wraps around, it is likely that (some of) the same peers
257+
# will be selected for the next content and thus remain busy. A potential improvement
258+
# could be to stream content from multiple "content id ranges".
259+
continue
260+
261+
if peers == notWithinRadius:
262+
# No peers were found within radius. Retrying is unlikely to help,
263+
# as new searches probably won't find peers in radius. This is a
264+
# network-wide issue due to insufficient storage.
265+
warn "No peers were found within radius for content",
266+
contentKey = contentKeyHex
267+
break
268+
269+
if accepted + alreadyStored >= 1:
270+
# At least one peer either accepted or already has the content,
271+
# data should be in the network.
272+
debug "At least one peer accepted or already stored the content"
273+
break
274+
except CancelledError as e:
275+
trace "Cancelled gossipWorker"
276+
raise e
277+
except CatchableError as e:
278+
error "JSON-RPC portal_historyPutContent failed",
279+
error = $e.msg, contentKey = contentKeyHex
236280
except CancelledError:
237281
trace "gossipWorker canceled"
238282

0 commit comments

Comments
 (0)