55import sys
66import threading
77import time
8+ from concurrent .futures import CancelledError
89from contextlib import contextmanager
910from dataclasses import dataclass
1011from pathlib import Path , PosixPath
@@ -177,14 +178,18 @@ def flash( # noqa: C901
177178 self .logger .info (f"Flash operation succeeded on attempt { attempt + 1 } " )
178179 break
179180 except Exception as e :
180- # Check if this is a retryable or non-retryable error (including sub-exceptions)
181- retryable_error = self ._get_retryable_error (e )
182- non_retryable_error = self ._get_non_retryable_error (e )
181+ # Categorize the exception as retryable or non-retryable
182+ categorized_error = self ._categorize_exception (e )
183183
184- if retryable_error is not None :
184+ if isinstance (categorized_error , FlashNonRetryableError ):
185+ # Non-retryable error, fail immediately
186+ self .logger .error (f"Flash operation failed with non-retryable error: { categorized_error } " )
187+ raise FlashError (f"Flash operation failed: { categorized_error } " ) from e
188+ else :
189+ # Retryable error
185190 if attempt < retries :
186191 self .logger .warning (
187- f"Flash attempt { attempt + 1 } failed with retryable error: { retryable_error } "
192+ f"Flash attempt { attempt + 1 } failed with retryable error: { categorized_error } "
188193 )
189194 self .logger .info (f"Retrying flash operation (attempt { attempt + 2 } /{ retries + 1 } )" )
190195 # Wait a bit before retrying
@@ -193,86 +198,88 @@ def flash( # noqa: C901
193198 else :
194199 self .logger .error (f"Flash operation failed after { retries + 1 } attempts" )
195200 raise FlashError (
196- f"Flash operation failed after { retries + 1 } attempts. Last error: { retryable_error } "
201+ f"Flash operation failed after { retries + 1 } attempts. Last error: { categorized_error } "
197202 ) from e
198- elif non_retryable_error is not None :
199- # Non-retryable error, fail immediately
200- self .logger .error (f"Flash operation failed with non-retryable error: { non_retryable_error } " )
201- raise FlashError (f"Flash operation failed: { non_retryable_error } " ) from e
202- else :
203- # Unexpected error, don't retry
204- self .logger .error (f"Flash operation failed with unexpected error: { e } " )
205- raise FlashError (f"Flash operation failed: { e } " ) from e
206203
207204
208205 total_time = time .time () - start_time
209206 # total time in minutes:seconds
210207 minutes , seconds = divmod (total_time , 60 )
211208 self .logger .info (f"Flashing completed in { int (minutes )} m { int (seconds ):02d} s" )
212209
213- def _get_retryable_error (self , exception : Exception ) -> FlashRetryableError | None :
214- """Find a retryable error in an exception (or any of its causes).
210+ def _categorize_exception (self , exception : Exception ) -> FlashRetryableError | FlashNonRetryableError :
211+ """Categorize an exception as retryable or non-retryable.
212+
213+ This method searches through the exception chain (including ExceptionGroups)
214+ to find FlashRetryableError or FlashNonRetryableError instances.
215+
216+ Priority:
217+ 1. FlashNonRetryableError - highest priority, fail immediately
218+ 2. FlashRetryableError - retry with backoff
219+ 3. Unknown exceptions - log full stack trace and treat as retryable
215220
216221 Args:
217- exception: The exception to check
222+ exception: The exception to categorize
218223
219224 Returns:
220- The FlashRetryableError if found, None otherwise
225+ FlashRetryableError or FlashNonRetryableError
221226 """
222- # Check if this is an ExceptionGroup and look through its exceptions
223- if hasattr (exception , 'exceptions' ):
224- for sub_exc in exception .exceptions :
225- result = self ._get_retryable_error (sub_exc )
226- if result is not None :
227- return result
228-
229- # Check the current exception
230- if isinstance (exception , FlashRetryableError ):
231- return exception
227+ # First pass: look for non-retryable errors (highest priority)
228+ non_retryable = self ._find_exception_in_chain (exception , FlashNonRetryableError )
229+ if non_retryable is not None :
230+ return non_retryable
231+
232+ # Second pass: look for retryable errors
233+ retryable = self ._find_exception_in_chain (exception , FlashRetryableError )
234+ if retryable is not None :
235+ return retryable
236+
237+ # CancelledError is a special case that should be treated as non-retryable
238+ if isinstance (exception , CancelledError ):
239+ return FlashNonRetryableError ("Operation cancelled" )
240+
241+ # Unknown exception - log full stack trace and wrap as retryable
242+ self .logger .exception (
243+ f"Unknown exception encountered during flash operation, treating as retryable: "
244+ f"{ type (exception ).__name__ } : { exception } "
245+ )
246+ wrapped_exception = FlashRetryableError (f"Unknown error occurred: { type (exception ).__name__ } : { exception } " )
247+ wrapped_exception .__cause__ = exception
248+ return wrapped_exception
232249
233- # Check the cause chain
234- current = getattr (exception , '__cause__' , None )
235- while current is not None :
236- if isinstance (current , FlashRetryableError ):
237- return current
238- # Also check if the cause is an ExceptionGroup
239- if hasattr (current , 'exceptions' ):
240- for sub_exc in current .exceptions :
241- result = self ._get_retryable_error (sub_exc )
242- if result is not None :
243- return result
244- current = getattr (current , '__cause__' , None )
245- return None
250+ def _find_exception_in_chain (self , exception : Exception , target_type : type ) -> Exception | None :
251+ """Find an exception of a specific type in an exception chain.
246252
247- def _get_non_retryable_error ( self , exception : Exception ) -> FlashNonRetryableError | None :
248- """Find a non-retryable error in an exception (or any of its causes) .
253+ Searches through the exception, its ExceptionGroup members (if any),
254+ and the cause chain recursively .
249255
250256 Args:
251- exception: The exception to check
257+ exception: The exception to search
258+ target_type: The exception type to search for
252259
253260 Returns:
254- The FlashNonRetryableError if found, None otherwise
261+ The found exception instance if found, None otherwise
255262 """
256263 # Check if this is an ExceptionGroup and look through its exceptions
257264 if hasattr (exception , 'exceptions' ):
258265 for sub_exc in exception .exceptions :
259- result = self ._get_non_retryable_error (sub_exc )
266+ result = self ._find_exception_in_chain (sub_exc , target_type )
260267 if result is not None :
261268 return result
262269
263270 # Check the current exception
264- if isinstance (exception , FlashNonRetryableError ):
271+ if isinstance (exception , target_type ):
265272 return exception
266273
267274 # Check the cause chain
268275 current = getattr (exception , '__cause__' , None )
269276 while current is not None :
270- if isinstance (current , FlashNonRetryableError ):
277+ if isinstance (current , target_type ):
271278 return current
272279 # Also check if the cause is an ExceptionGroup
273280 if hasattr (current , 'exceptions' ):
274281 for sub_exc in current .exceptions :
275- result = self ._get_non_retryable_error (sub_exc )
282+ result = self ._find_exception_in_chain (sub_exc , target_type )
276283 if result is not None :
277284 return result
278285 current = getattr (current , '__cause__' , None )
@@ -1156,7 +1163,7 @@ def base():
11561163 @click .option (
11571164 "--fls-version" ,
11581165 type = str ,
1159- default = "0.1.5 " , # TODO(majopela): set default to "" once fls is included in our images
1166+ default = "0.1.9 " , # TODO(majopela): set default to "" once fls is included in our images
11601167 help = "Download an specific fls version from the github releases" ,
11611168 )
11621169 @debug_console_option
0 commit comments