"""xAI API client for X (Twitter) discovery.""" import json import re import sys from typing import Any, Dict, List, Optional from . import http def _log_error(msg: str): """Log error to stderr.""" sys.stderr.write(f"[X ERROR] {msg}\n") sys.stderr.flush() # xAI uses responses endpoint with Agent Tools API XAI_RESPONSES_URL = "https://api.x.ai/v1/responses" # Depth configurations: (min, max) posts to request DEPTH_CONFIG = { "quick": (8, 12), "default": (20, 30), "deep": (40, 60), } X_SEARCH_PROMPT = """You have access to real-time X (Twitter) data. Search for posts about: {topic} Focus on posts from {from_date} to {to_date}. Find {min_items}-{max_items} high-quality, relevant posts. IMPORTANT: Return ONLY valid JSON in this exact format, no other text: {{ "items": [ {{ "text": "Post text content (truncated if long)", "url": "https://x.com/user/status/...", "author_handle": "username", "date": "YYYY-MM-DD or null if unknown", "engagement": {{ "likes": 100, "reposts": 25, "replies": 15, "quotes": 5 }}, "why_relevant": "Brief explanation of relevance", "relevance": 0.85 }} ] }} Rules: - relevance is 0.0 to 1.0 (1.0 = highly relevant) - date must be YYYY-MM-DD format or null - engagement can be null if unknown - Include diverse voices/accounts if applicable - Prefer posts with substantive content, not just links""" def search_x( api_key: str, model: str, topic: str, from_date: str, to_date: str, depth: str = "default", mock_response: Optional[Dict] = None, ) -> Dict[str, Any]: """Search X for relevant posts using xAI API with live search. Args: api_key: xAI API key model: Model to use topic: Search topic from_date: Start date (YYYY-MM-DD) to_date: End date (YYYY-MM-DD) depth: Research depth - "quick", "default", or "deep" mock_response: Mock response for testing Returns: Raw API response """ if mock_response is not None: return mock_response min_items, max_items = DEPTH_CONFIG.get(depth, DEPTH_CONFIG["default"]) headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } # Adjust timeout based on depth (generous for API response time) timeout = 90 if depth == "quick" else 120 if depth == "default" else 180 # Use Agent Tools API with x_search tool payload = { "model": model, "tools": [ {"type": "x_search"} ], "input": [ { "role": "user", "content": X_SEARCH_PROMPT.format( topic=topic, from_date=from_date, to_date=to_date, min_items=min_items, max_items=max_items, ), } ], } return http.post(XAI_RESPONSES_URL, payload, headers=headers, timeout=timeout) def parse_x_response(response: Dict[str, Any]) -> List[Dict[str, Any]]: """Parse xAI response to extract X items. Args: response: Raw API response Returns: List of item dicts """ items = [] # Check for API errors first if "error" in response and response["error"]: error = response["error"] err_msg = error.get("message", str(error)) if isinstance(error, dict) else str(error) _log_error(f"xAI API error: {err_msg}") if http.DEBUG: _log_error(f"Full error response: {json.dumps(response, indent=2)[:1000]}") return items # Try to find the output text output_text = "" if "output" in response: output = response["output"] if isinstance(output, str): output_text = output elif isinstance(output, list): for item in output: if isinstance(item, dict): if item.get("type") == "message": content = item.get("content", []) for c in content: if isinstance(c, dict) and c.get("type") == "output_text": output_text = c.get("text", "") break elif "text" in item: output_text = item["text"] elif isinstance(item, str): output_text = item if output_text: break # Also check for choices (older format) if not output_text and "choices" in response: for choice in response["choices"]: if "message" in choice: output_text = choice["message"].get("content", "") break if not output_text: return items # Extract JSON from the response json_match = re.search(r'\{[\s\S]*"items"[\s\S]*\}', output_text) if json_match: try: data = json.loads(json_match.group()) items = data.get("items", []) except json.JSONDecodeError: pass # Validate and clean items clean_items = [] for i, item in enumerate(items): if not isinstance(item, dict): continue url = item.get("url", "") if not url: continue # Parse engagement engagement = None eng_raw = item.get("engagement") if isinstance(eng_raw, dict): engagement = { "likes": int(eng_raw.get("likes", 0)) if eng_raw.get("likes") else None, "reposts": int(eng_raw.get("reposts", 0)) if eng_raw.get("reposts") else None, "replies": int(eng_raw.get("replies", 0)) if eng_raw.get("replies") else None, "quotes": int(eng_raw.get("quotes", 0)) if eng_raw.get("quotes") else None, } clean_item = { "id": f"X{i+1}", "text": str(item.get("text", "")).strip()[:500], # Truncate long text "url": url, "author_handle": str(item.get("author_handle", "")).strip().lstrip("@"), "date": item.get("date"), "engagement": engagement, "why_relevant": str(item.get("why_relevant", "")).strip(), "relevance": min(1.0, max(0.0, float(item.get("relevance", 0.5)))), } # Validate date format if clean_item["date"]: if not re.match(r'^\d{4}-\d{2}-\d{2}$', str(clean_item["date"])): clean_item["date"] = None clean_items.append(clean_item) return clean_items