This commit is contained in:
Pablo Estevez
2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions

View File

@@ -8,7 +8,7 @@ across the CLI tools to improve maintainability and clarity.
# Default scraping limits
DEFAULT_RATE_LIMIT = 0.5 # seconds between requests
DEFAULT_MAX_PAGES = 500 # maximum pages to scrape
DEFAULT_MAX_PAGES = 500 # maximum pages to scrape
DEFAULT_CHECKPOINT_INTERVAL = 1000 # pages between checkpoints
DEFAULT_ASYNC_MODE = False # use async mode for parallel scraping (opt-in)
@@ -26,7 +26,7 @@ CONTENT_MATCH_POINTS = 1 # points for content keyword match
# API-based enhancement limits (uses Anthropic API)
API_CONTENT_LIMIT = 100000 # max characters for API enhancement
API_PREVIEW_LIMIT = 40000 # max characters for preview
API_PREVIEW_LIMIT = 40000 # max characters for preview
# Local enhancement limits (uses Claude Code Max)
LOCAL_CONTENT_LIMIT = 50000 # max characters for local enhancement
@@ -36,7 +36,7 @@ LOCAL_PREVIEW_LIMIT = 20000 # max characters for preview
# Estimation and discovery settings
DEFAULT_MAX_DISCOVERY = 1000 # default max pages to discover
DISCOVERY_THRESHOLD = 10000 # threshold for warnings
DISCOVERY_THRESHOLD = 10000 # threshold for warnings
# ===== FILE LIMITS =====
@@ -48,25 +48,25 @@ MAX_CODE_BLOCKS_PER_PAGE = 5 # maximum code blocks to extract per page
__all__ = [
# Scraping
'DEFAULT_RATE_LIMIT',
'DEFAULT_MAX_PAGES',
'DEFAULT_CHECKPOINT_INTERVAL',
'DEFAULT_ASYNC_MODE',
'CONTENT_PREVIEW_LENGTH',
'MAX_PAGES_WARNING_THRESHOLD',
'MIN_CATEGORIZATION_SCORE',
'URL_MATCH_POINTS',
'TITLE_MATCH_POINTS',
'CONTENT_MATCH_POINTS',
"DEFAULT_RATE_LIMIT",
"DEFAULT_MAX_PAGES",
"DEFAULT_CHECKPOINT_INTERVAL",
"DEFAULT_ASYNC_MODE",
"CONTENT_PREVIEW_LENGTH",
"MAX_PAGES_WARNING_THRESHOLD",
"MIN_CATEGORIZATION_SCORE",
"URL_MATCH_POINTS",
"TITLE_MATCH_POINTS",
"CONTENT_MATCH_POINTS",
# Enhancement
'API_CONTENT_LIMIT',
'API_PREVIEW_LIMIT',
'LOCAL_CONTENT_LIMIT',
'LOCAL_PREVIEW_LIMIT',
"API_CONTENT_LIMIT",
"API_PREVIEW_LIMIT",
"LOCAL_CONTENT_LIMIT",
"LOCAL_PREVIEW_LIMIT",
# Estimation
'DEFAULT_MAX_DISCOVERY',
'DISCOVERY_THRESHOLD',
"DEFAULT_MAX_DISCOVERY",
"DISCOVERY_THRESHOLD",
# Limits
'MAX_REFERENCE_FILES',
'MAX_CODE_BLOCKS_PER_PAGE',
"MAX_REFERENCE_FILES",
"MAX_CODE_BLOCKS_PER_PAGE",
]