change max lenght

This commit is contained in:
Pablo Estevez
2026-01-17 17:48:15 +00:00
parent 97e597d9db
commit c33c6f9073
118 changed files with 3546 additions and 960 deletions

View File

@@ -406,7 +406,13 @@ class UnifiedScraper:
# Append to list instead of overwriting (multi-source support)
self.scraped_data["github"].append(
{"repo": repo, "repo_id": repo_id, "idx": idx, "data": github_data, "data_file": github_data_file}
{
"repo": repo,
"repo_id": repo_id,
"idx": idx,
"data": github_data,
"data_file": github_data_file,
}
)
# Build standalone SKILL.md for synthesis using GitHubToSkillConverter
@@ -433,7 +439,9 @@ class UnifiedScraper:
logger.info(f"📦 Moved GitHub output to cache: {cache_github_dir}")
if os.path.exists(github_data_file_path):
cache_github_data = os.path.join(self.data_dir, f"{github_config['name']}_github_data.json")
cache_github_data = os.path.join(
self.data_dir, f"{github_config['name']}_github_data.json"
)
if os.path.exists(cache_github_data):
os.remove(cache_github_data)
shutil.move(github_data_file_path, cache_github_data)
@@ -478,7 +486,13 @@ class UnifiedScraper:
# Append to list instead of overwriting
self.scraped_data["pdf"].append(
{"pdf_path": pdf_path, "pdf_id": pdf_id, "idx": idx, "data": pdf_data, "data_file": pdf_data_file}
{
"pdf_path": pdf_path,
"pdf_id": pdf_id,
"idx": idx,
"data": pdf_data,
"data_file": pdf_data_file,
}
)
# Build standalone SKILL.md for synthesis
@@ -611,12 +625,20 @@ class UnifiedScraper:
# Load C3.x outputs into memory
c3_data = {
"patterns": self._load_json(temp_output / "patterns" / "detected_patterns.json"),
"test_examples": self._load_json(temp_output / "test_examples" / "test_examples.json"),
"test_examples": self._load_json(
temp_output / "test_examples" / "test_examples.json"
),
"how_to_guides": self._load_guide_collection(temp_output / "tutorials"),
"config_patterns": self._load_json(temp_output / "config_patterns" / "config_patterns.json"),
"architecture": self._load_json(temp_output / "architecture" / "architectural_patterns.json"),
"config_patterns": self._load_json(
temp_output / "config_patterns" / "config_patterns.json"
),
"architecture": self._load_json(
temp_output / "architecture" / "architectural_patterns.json"
),
"api_reference": self._load_api_reference(temp_output / "api_reference"), # C2.5
"dependency_graph": self._load_json(temp_output / "dependencies" / "dependency_graph.json"), # C2.6
"dependency_graph": self._load_json(
temp_output / "dependencies" / "dependency_graph.json"
), # C2.6
}
# Log summary
@@ -769,7 +791,9 @@ class UnifiedScraper:
conflicts = conflicts_data.get("conflicts", [])
# Build skill
builder = UnifiedSkillBuilder(self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir)
builder = UnifiedSkillBuilder(
self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir
)
builder.build()
@@ -836,7 +860,10 @@ Examples:
parser.add_argument("--config", "-c", required=True, help="Path to unified config JSON file")
parser.add_argument(
"--merge-mode", "-m", choices=["rule-based", "claude-enhanced"], help="Override config merge mode"
"--merge-mode",
"-m",
choices=["rule-based", "claude-enhanced"],
help="Override config merge mode",
)
parser.add_argument(
"--skip-codebase-analysis",
@@ -854,7 +881,9 @@ Examples:
for source in scraper.config.get("sources", []):
if source["type"] == "github":
source["enable_codebase_analysis"] = False
logger.info(f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}")
logger.info(
f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}"
)
# Run scraper
scraper.run()