#!/usr/bin/env python3 """ last30days - Research a topic from the last 30 days on Reddit + X. Usage: python3 last30days.py [options] Options: --mock Use fixtures instead of real API calls --emit=MODE Output mode: compact|json|md|context|path (default: compact) --sources=MODE Source selection: auto|reddit|x|both (default: auto) --quick Faster research with fewer sources (8-12 each) --deep Comprehensive research with more sources (50-70 Reddit, 40-60 X) --debug Enable verbose debug logging """ import argparse import json import os import sys from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone from pathlib import Path # Add lib to path SCRIPT_DIR = Path(__file__).parent.resolve() sys.path.insert(0, str(SCRIPT_DIR)) from lib import ( dates, dedupe, env, http, models, normalize, openai_reddit, reddit_enrich, render, schema, score, ui, websearch, xai_x, ) def load_fixture(name: str) -> dict: """Load a fixture file.""" fixture_path = SCRIPT_DIR.parent / "fixtures" / name if fixture_path.exists(): with open(fixture_path) as f: return json.load(f) return {} def _search_reddit( topic: str, config: dict, selected_models: dict, from_date: str, to_date: str, depth: str, mock: bool, ) -> tuple: """Search Reddit via OpenAI (runs in thread). Returns: Tuple of (reddit_items, raw_openai, error) """ raw_openai = None reddit_error = None if mock: raw_openai = load_fixture("openai_sample.json") else: try: raw_openai = openai_reddit.search_reddit( config["OPENAI_API_KEY"], selected_models["openai"], topic, from_date, to_date, depth=depth, ) except http.HTTPError as e: raw_openai = {"error": str(e)} reddit_error = f"API error: {e}" except Exception as e: raw_openai = {"error": str(e)} reddit_error = f"{type(e).__name__}: {e}" # Parse response reddit_items = openai_reddit.parse_reddit_response(raw_openai or {}) # Quick retry with simpler query if few results if len(reddit_items) < 5 and not mock and not reddit_error: core = openai_reddit._extract_core_subject(topic) if core.lower() != topic.lower(): try: retry_raw = openai_reddit.search_reddit( config["OPENAI_API_KEY"], selected_models["openai"], core, from_date, to_date, depth=depth, ) retry_items = openai_reddit.parse_reddit_response(retry_raw) # Add items not already found (by URL) existing_urls = {item.get("url") for item in reddit_items} for item in retry_items: if item.get("url") not in existing_urls: reddit_items.append(item) except Exception: pass return reddit_items, raw_openai, reddit_error def _search_x( topic: str, config: dict, selected_models: dict, from_date: str, to_date: str, depth: str, mock: bool, ) -> tuple: """Search X via xAI (runs in thread). Returns: Tuple of (x_items, raw_xai, error) """ raw_xai = None x_error = None if mock: raw_xai = load_fixture("xai_sample.json") else: try: raw_xai = xai_x.search_x( config["XAI_API_KEY"], selected_models["xai"], topic, from_date, to_date, depth=depth, ) except http.HTTPError as e: raw_xai = {"error": str(e)} x_error = f"API error: {e}" except Exception as e: raw_xai = {"error": str(e)} x_error = f"{type(e).__name__}: {e}" # Parse response x_items = xai_x.parse_x_response(raw_xai or {}) return x_items, raw_xai, x_error def run_research( topic: str, sources: str, config: dict, selected_models: dict, from_date: str, to_date: str, depth: str = "default", mock: bool = False, progress: ui.ProgressDisplay = None, ) -> tuple: """Run the research pipeline. Returns: Tuple of (reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error) Note: web_needed is True when WebSearch should be performed by Claude. The script outputs a marker and Claude handles WebSearch in its session. """ reddit_items = [] x_items = [] raw_openai = None raw_xai = None raw_reddit_enriched = [] reddit_error = None x_error = None # Check if WebSearch is needed (always needed in web-only mode) web_needed = sources in ("all", "web", "reddit-web", "x-web") # Web-only mode: no API calls needed, Claude handles everything if sources == "web": if progress: progress.start_web_only() progress.end_web_only() return reddit_items, x_items, True, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error # Determine which searches to run run_reddit = sources in ("both", "reddit", "all", "reddit-web") run_x = sources in ("both", "x", "all", "x-web") # Run Reddit and X searches in parallel reddit_future = None x_future = None with ThreadPoolExecutor(max_workers=2) as executor: # Submit both searches if run_reddit: if progress: progress.start_reddit() reddit_future = executor.submit( _search_reddit, topic, config, selected_models, from_date, to_date, depth, mock ) if run_x: if progress: progress.start_x() x_future = executor.submit( _search_x, topic, config, selected_models, from_date, to_date, depth, mock ) # Collect results if reddit_future: try: reddit_items, raw_openai, reddit_error = reddit_future.result() if reddit_error and progress: progress.show_error(f"Reddit error: {reddit_error}") except Exception as e: reddit_error = f"{type(e).__name__}: {e}" if progress: progress.show_error(f"Reddit error: {e}") if progress: progress.end_reddit(len(reddit_items)) if x_future: try: x_items, raw_xai, x_error = x_future.result() if x_error and progress: progress.show_error(f"X error: {x_error}") except Exception as e: x_error = f"{type(e).__name__}: {e}" if progress: progress.show_error(f"X error: {e}") if progress: progress.end_x(len(x_items)) # Enrich Reddit items with real data (sequential, but with error handling per-item) if reddit_items: if progress: progress.start_reddit_enrich(1, len(reddit_items)) for i, item in enumerate(reddit_items): if progress and i > 0: progress.update_reddit_enrich(i + 1, len(reddit_items)) try: if mock: mock_thread = load_fixture("reddit_thread_sample.json") reddit_items[i] = reddit_enrich.enrich_reddit_item(item, mock_thread) else: reddit_items[i] = reddit_enrich.enrich_reddit_item(item) except Exception as e: # Log but don't crash - keep the unenriched item if progress: progress.show_error(f"Enrich failed for {item.get('url', 'unknown')}: {e}") raw_reddit_enriched.append(reddit_items[i]) if progress: progress.end_reddit_enrich() return reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error def main(): parser = argparse.ArgumentParser( description="Research a topic from the last 30 days on Reddit + X" ) parser.add_argument("topic", nargs="?", help="Topic to research") parser.add_argument("--mock", action="store_true", help="Use fixtures") parser.add_argument( "--emit", choices=["compact", "json", "md", "context", "path"], default="compact", help="Output mode", ) parser.add_argument( "--sources", choices=["auto", "reddit", "x", "both"], default="auto", help="Source selection", ) parser.add_argument( "--quick", action="store_true", help="Faster research with fewer sources (8-12 each)", ) parser.add_argument( "--deep", action="store_true", help="Comprehensive research with more sources (50-70 Reddit, 40-60 X)", ) parser.add_argument( "--debug", action="store_true", help="Enable verbose debug logging", ) parser.add_argument( "--include-web", action="store_true", help="Include general web search alongside Reddit/X (lower weighted)", ) args = parser.parse_args() # Enable debug logging if requested if args.debug: os.environ["LAST30DAYS_DEBUG"] = "1" # Re-import http to pick up debug flag from lib import http as http_module http_module.DEBUG = True # Determine depth if args.quick and args.deep: print("Error: Cannot use both --quick and --deep", file=sys.stderr) sys.exit(1) elif args.quick: depth = "quick" elif args.deep: depth = "deep" else: depth = "default" if not args.topic: print("Error: Please provide a topic to research.", file=sys.stderr) print("Usage: python3 last30days.py [options]", file=sys.stderr) sys.exit(1) # Load config config = env.get_config() # Check available sources available = env.get_available_sources(config) # Mock mode can work without keys if args.mock: if args.sources == "auto": sources = "both" else: sources = args.sources else: # Validate requested sources against available sources, error = env.validate_sources(args.sources, available, args.include_web) if error: # If it's a warning about WebSearch fallback, print but continue if "WebSearch fallback" in error: print(f"Note: {error}", file=sys.stderr) else: print(f"Error: {error}", file=sys.stderr) sys.exit(1) # Get date range from_date, to_date = dates.get_date_range(30) # Check what keys are missing for promo messaging missing_keys = env.get_missing_keys(config) # Initialize progress display progress = ui.ProgressDisplay(args.topic, show_banner=True) # Show promo for missing keys BEFORE research if missing_keys != 'none': progress.show_promo(missing_keys) # Select models if args.mock: # Use mock models mock_openai_models = load_fixture("models_openai_sample.json").get("data", []) mock_xai_models = load_fixture("models_xai_sample.json").get("data", []) selected_models = models.get_models( { "OPENAI_API_KEY": "mock", "XAI_API_KEY": "mock", **config, }, mock_openai_models, mock_xai_models, ) else: selected_models = models.get_models(config) # Determine mode string if sources == "all": mode = "all" # reddit + x + web elif sources == "both": mode = "both" # reddit + x elif sources == "reddit": mode = "reddit-only" elif sources == "reddit-web": mode = "reddit-web" elif sources == "x": mode = "x-only" elif sources == "x-web": mode = "x-web" elif sources == "web": mode = "web-only" else: mode = sources # Run research reddit_items, x_items, web_needed, raw_openai, raw_xai, raw_reddit_enriched, reddit_error, x_error = run_research( args.topic, sources, config, selected_models, from_date, to_date, depth, args.mock, progress, ) # Processing phase progress.start_processing() # Normalize items normalized_reddit = normalize.normalize_reddit_items(reddit_items, from_date, to_date) normalized_x = normalize.normalize_x_items(x_items, from_date, to_date) # Hard date filter: exclude items with verified dates outside the range # This is the safety net - even if prompts let old content through, this filters it filtered_reddit = normalize.filter_by_date_range(normalized_reddit, from_date, to_date) filtered_x = normalize.filter_by_date_range(normalized_x, from_date, to_date) # Score items scored_reddit = score.score_reddit_items(filtered_reddit) scored_x = score.score_x_items(filtered_x) # Sort items sorted_reddit = score.sort_items(scored_reddit) sorted_x = score.sort_items(scored_x) # Dedupe items deduped_reddit = dedupe.dedupe_reddit(sorted_reddit) deduped_x = dedupe.dedupe_x(sorted_x) progress.end_processing() # Create report report = schema.create_report( args.topic, from_date, to_date, mode, selected_models.get("openai"), selected_models.get("xai"), ) report.reddit = deduped_reddit report.x = deduped_x report.reddit_error = reddit_error report.x_error = x_error # Generate context snippet report.context_snippet_md = render.render_context_snippet(report) # Write outputs render.write_outputs(report, raw_openai, raw_xai, raw_reddit_enriched) # Show completion if sources == "web": progress.show_web_only_complete() else: progress.show_complete(len(deduped_reddit), len(deduped_x)) # Output result output_result(report, args.emit, web_needed, args.topic, from_date, to_date, missing_keys) def output_result( report: schema.Report, emit_mode: str, web_needed: bool = False, topic: str = "", from_date: str = "", to_date: str = "", missing_keys: str = "none", ): """Output the result based on emit mode.""" if emit_mode == "compact": print(render.render_compact(report, missing_keys=missing_keys)) elif emit_mode == "json": print(json.dumps(report.to_dict(), indent=2)) elif emit_mode == "md": print(render.render_full_report(report)) elif emit_mode == "context": print(report.context_snippet_md) elif emit_mode == "path": print(render.get_context_path()) # Output WebSearch instructions if needed if web_needed: print("\n" + "="*60) print("### WEBSEARCH REQUIRED ###") print("="*60) print(f"Topic: {topic}") print(f"Date range: {from_date} to {to_date}") print("") print("Claude: Use your WebSearch tool to find 8-15 relevant web pages.") print("EXCLUDE: reddit.com, x.com, twitter.com (already covered above)") print("INCLUDE: blogs, docs, news, tutorials from the last 30 days") print("") print("After searching, synthesize WebSearch results WITH the Reddit/X") print("results above. WebSearch items should rank LOWER than comparable") print("Reddit/X items (they lack engagement metrics).") print("="*60) if __name__ == "__main__": main()