feat(utils): add retry utilities with exponential backoff (#208)

Add retry_with_backoff() and retry_with_backoff_async() for network operations.

Features:
- Configurable max attempts (default: 3)
- Exponential backoff with configurable base delay
- Operation name for meaningful log messages
- Both sync and async versions

Addresses E2.6: Add retry logic for network failures

Co-authored-by: Joseph Magly <1159087+jmagly@users.noreply.github.com>
This commit is contained in:
Joseph Magly
2025-12-21 14:31:38 -05:00
committed by GitHub
parent 65ded6c07c
commit 0d0eda7149
2 changed files with 234 additions and 2 deletions

View File

@@ -7,8 +7,14 @@ import os
import sys
import subprocess
import platform
import time
import logging
from pathlib import Path
from typing import Optional, Tuple, Dict, Union
from typing import Optional, Tuple, Dict, Union, TypeVar, Callable
logger = logging.getLogger(__name__)
T = TypeVar('T')
def open_folder(folder_path: Union[str, Path]) -> bool:
@@ -225,3 +231,113 @@ def read_reference_files(skill_dir: Union[str, Path], max_chars: int = 100000, p
break
return references
def retry_with_backoff(
operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation"
) -> T:
"""Retry an operation with exponential backoff.
Useful for network operations that may fail due to transient errors.
Waits progressively longer between retries (exponential backoff).
Args:
operation: Function to retry (takes no arguments, returns result)
max_attempts: Maximum number of attempts (default: 3)
base_delay: Base delay in seconds, doubles each retry (default: 1.0)
operation_name: Name for logging purposes (default: "operation")
Returns:
Result of successful operation
Raises:
Exception: Last exception if all retries fail
Example:
>>> def fetch_page():
... response = requests.get(url, timeout=30)
... response.raise_for_status()
... return response.text
>>> content = retry_with_backoff(fetch_page, max_attempts=3, operation_name=f"fetch {url}")
"""
last_exception: Optional[Exception] = None
for attempt in range(1, max_attempts + 1):
try:
return operation()
except Exception as e:
last_exception = e
if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1))
logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name, attempt, max_attempts, delay, e
)
time.sleep(delay)
else:
logger.error(
"%s failed after %d attempts: %s",
operation_name, max_attempts, e
)
# This should always have a value, but mypy doesn't know that
if last_exception is not None:
raise last_exception
raise RuntimeError(f"{operation_name} failed with no exception captured")
async def retry_with_backoff_async(
operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation"
) -> T:
"""Async version of retry_with_backoff for async operations.
Args:
operation: Async function to retry (takes no arguments, returns awaitable)
max_attempts: Maximum number of attempts (default: 3)
base_delay: Base delay in seconds, doubles each retry (default: 1.0)
operation_name: Name for logging purposes (default: "operation")
Returns:
Result of successful operation
Raises:
Exception: Last exception if all retries fail
Example:
>>> async def fetch_page():
... response = await client.get(url, timeout=30.0)
... response.raise_for_status()
... return response.text
>>> content = await retry_with_backoff_async(fetch_page, operation_name=f"fetch {url}")
"""
import asyncio
last_exception: Optional[Exception] = None
for attempt in range(1, max_attempts + 1):
try:
return await operation()
except Exception as e:
last_exception = e
if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1))
logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name, attempt, max_attempts, delay, e
)
await asyncio.sleep(delay)
else:
logger.error(
"%s failed after %d attempts: %s",
operation_name, max_attempts, e
)
if last_exception is not None:
raise last_exception
raise RuntimeError(f"{operation_name} failed with no exception captured")