Move UML/ directory and Architecture.md from Docs/ to docs/. Rename Architecture.md to UML_ARCHITECTURE.md to avoid collision with existing docs/ARCHITECTURE.md (docs organization file). Update all references in README.md, CONTRIBUTING.md, CLAUDE.md, and the architecture file itself. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
425 lines
11 KiB
HTML
425 lines
11 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>Scrapers</title>
|
|
<link href='http://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,700,300italic,400italic,700italic|Source+Code+Pro:300,400,700' rel='stylesheet' type='text/css'>
|
|
<link rel="stylesheet" href="../assets/css/bootstrap.css">
|
|
<link rel="stylesheet" href="../assets/css/jquery.bonsai.css">
|
|
<link rel="stylesheet" href="../assets/css/main.css">
|
|
<link rel="stylesheet" href="../assets/icon-font/icons.css">
|
|
<script type="text/javascript" src="../assets/js/jquery-2.1.0.min.js"></script>
|
|
<script type="text/javascript" src="../assets/js/bootstrap.js"></script>
|
|
<script type="text/javascript" src="../assets/js/jquery.bonsai.js"></script>
|
|
<script type="text/javascript" src="../assets/js/imageMapResizer.min.js"></script>
|
|
|
|
</head>
|
|
<body>
|
|
<div>
|
|
|
|
<!-- Name Title -->
|
|
|
|
<h1>Scrapers</h1>
|
|
|
|
<!-- Type and Stereotype -->
|
|
|
|
<section style="margin-top: .5em;">
|
|
<span class="alert alert-info">
|
|
<span class="node-icon staruml-icon icon-UMLPackage"></span>
|
|
UMLPackage
|
|
</span>
|
|
|
|
</section>
|
|
|
|
<!-- Path -->
|
|
|
|
<section style="margin-top: 10px">
|
|
|
|
|
|
|
|
|
|
<span class="label label-info"><a href='cf9c8b720f3815adeccaf3ef6e48c6c4.html'><span class='node-icon staruml-icon icon-Project'></span>Skill Seekers</a></span>
|
|
|
|
<span>::</span>
|
|
<span class="label label-info"><a href='6a4361334e8b649314ed681b9e6798c3.html'><span class='node-icon staruml-icon icon-UMLModel'></span>skill_seekers</a></span>
|
|
|
|
<span>::</span>
|
|
<span class="label label-info"><a href='9a6a2c65f0fecfbf94214fe5dacc11b2.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>Scrapers</a></span>
|
|
|
|
</section>
|
|
|
|
<!-- Diagram -->
|
|
|
|
|
|
|
|
<!-- Description -->
|
|
|
|
|
|
<section>
|
|
<h3>Description</h3>
|
|
<div>
|
|
|
|
<p>17 source-type scrapers that extract content from documentation websites, GitHub repos, PDFs, Word docs, EPUB, video, Jupyter notebooks, HTML, OpenAPI specs, AsciiDoc, PPTX, RSS, man pages, Confluence, Notion, and chat exports. Each scraper has a main() entry point and a *ToSkillConverter class.</p>
|
|
|
|
</div>
|
|
</section>
|
|
|
|
|
|
<!-- Specification -->
|
|
|
|
|
|
|
|
<!-- Directed Relationship -->
|
|
|
|
|
|
|
|
<!-- Undirected Relationship -->
|
|
|
|
|
|
|
|
<!-- Classifier -->
|
|
|
|
|
|
|
|
<!-- Interface -->
|
|
|
|
|
|
|
|
<!-- Component -->
|
|
|
|
|
|
|
|
<!-- Node -->
|
|
|
|
|
|
|
|
<!-- Actor -->
|
|
|
|
|
|
|
|
<!-- Use Case -->
|
|
|
|
|
|
|
|
<!-- Template Parameters -->
|
|
|
|
|
|
|
|
<!-- Literals -->
|
|
|
|
|
|
|
|
<!-- Attributes -->
|
|
|
|
|
|
|
|
<!-- Operations -->
|
|
|
|
|
|
|
|
<!-- Receptions -->
|
|
|
|
|
|
|
|
<!-- Extension Points -->
|
|
|
|
|
|
|
|
<!-- Parameters -->
|
|
|
|
|
|
|
|
<!-- Diagrams -->
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Diagrams</h3>
|
|
<div>
|
|
|
|
<div style="display: inline-block; width: 250px; vertical-align: top;">
|
|
<div class="member">
|
|
<a href="a47184837219aea9ccabc89e35d90216.html">
|
|
<img src="../diagrams/a47184837219aea9ccabc89e35d90216.svg">
|
|
<div style="text-align: center; margin-top: 1em;">Scrapers</div>
|
|
</a>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</section>
|
|
|
|
|
|
<!-- Behavior -->
|
|
|
|
|
|
|
|
<!-- Action -->
|
|
|
|
|
|
|
|
<!-- Interaction -->
|
|
|
|
|
|
|
|
<!-- CombinedFragment -->
|
|
|
|
|
|
|
|
<!-- Activity -->
|
|
|
|
|
|
|
|
<!-- State Machine -->
|
|
|
|
|
|
|
|
<!-- State Machine -->
|
|
|
|
|
|
|
|
<!-- State -->
|
|
|
|
|
|
|
|
<!-- Vertex -->
|
|
|
|
|
|
|
|
<!-- Transition -->
|
|
|
|
|
|
|
|
<!-- Data Model (ERD) -->
|
|
|
|
|
|
|
|
<!-- Columns (ERD) -->
|
|
|
|
|
|
|
|
<!-- Related Entities (ERD) -->
|
|
|
|
|
|
|
|
<!-- Data Flows (DFD) -->
|
|
|
|
|
|
|
|
<!-- Flows (Flowchart) -->
|
|
|
|
|
|
|
|
<!-- Properties -->
|
|
|
|
<section>
|
|
<h3>Properties</h3>
|
|
|
|
<table class="table table-striped table-bordered">
|
|
<tr>
|
|
<th width="50%">Name</th>
|
|
<th width="50%">Value</th>
|
|
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<tr>
|
|
<td>name</td>
|
|
<td>Scrapers</td>
|
|
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<tr>
|
|
<td>stereotype</td>
|
|
<td><span class='label label-info'>null</span></td>
|
|
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
<tr>
|
|
<td>visibility</td>
|
|
<td>public</td>
|
|
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<tr>
|
|
<td>importedElements</td>
|
|
<td>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
|
|
|
|
</table>
|
|
|
|
</section>
|
|
|
|
<!-- Tags -->
|
|
|
|
|
|
|
|
<!-- Constraints, Dependencies, Dependants -->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Dependencies</h3>
|
|
<ul class="nav nav-list">
|
|
|
|
<li><a href='4c3d4d95b54fee708dbaf4d3251d74d9.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>Utilities</a></li>
|
|
|
|
<li><a href='136e9c05d710dc4dc71692abb7b4072c.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>Analysis</a></li>
|
|
|
|
</ul>
|
|
</section>
|
|
|
|
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Dependants</h3>
|
|
<ul class="nav nav-list">
|
|
|
|
<li><a href='b0fef7dc7755be4eaa15e0a7f4f12827.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>CLICore</a></li>
|
|
|
|
<li><a href='9c817b13c52de62951ae09053c981616.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>Enhancement</a></li>
|
|
|
|
<li><a href='d178003f7e8fee3d9635eb757e1bffa1.html'><span class='node-icon staruml-icon icon-UMLPackage'></span>MCP</a></li>
|
|
|
|
</ul>
|
|
</section>
|
|
|
|
|
|
|
|
|
|
<!-- Relationships -->
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Relationships</h3>
|
|
<ul class="nav nav-list">
|
|
|
|
<li><a href='66c5db9de07c76e1b7444f602a1e28f8.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(CLICore→Scrapers)</a></li>
|
|
|
|
<li><a href='a7e387e6836008e652c37a915d30c255.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(Enhancement→Scrapers)</a></li>
|
|
|
|
<li><a href='5541c323f937abda33d6dadae4cedc53.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(MCP→Scrapers)</a></li>
|
|
|
|
<li><a href='5aed048619037fd409f2563cc9ad7c15.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(Scrapers→Utilities)</a></li>
|
|
|
|
<li><a href='ba72e8b427ca412c06af06c82d373fb9.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(Scrapers→Analysis)</a></li>
|
|
|
|
</ul>
|
|
</section>
|
|
|
|
|
|
|
|
<!-- Owned Elements -->
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Owned Elements</h3>
|
|
<ul class="nav nav-list">
|
|
|
|
<li><a href='a47184837219aea9ccabc89e35d90216.html'><span class='node-icon staruml-icon icon-UMLClassDiagram'></span>Scrapers</a></li>
|
|
|
|
<li><a href='bb5a0e821d35146502e601959f106ff6.html'><span class='node-icon staruml-icon icon-UMLInterface'></span>IScraper</a></li>
|
|
|
|
<li><a href='3af5ea4716a6bdcdb5593eb62d9fe016.html'><span class='node-icon staruml-icon icon-UMLClass'></span>DocToSkillConverter</a></li>
|
|
|
|
<li><a href='f821b72afc0c59329dd76d9eb0aa9780.html'><span class='node-icon staruml-icon icon-UMLClass'></span>GitHubScraper</a></li>
|
|
|
|
<li><a href='19fafbea28e5d5f773d66b64b077ad83.html'><span class='node-icon staruml-icon icon-UMLClass'></span>GitHubToSkillConverter</a></li>
|
|
|
|
<li><a href='f404e5b8c71d89b9a74f9b872351ff0d.html'><span class='node-icon staruml-icon icon-UMLClass'></span>PDFToSkillConverter</a></li>
|
|
|
|
<li><a href='02231b18c56eee1fe00db0b3cc88d3f3.html'><span class='node-icon staruml-icon icon-UMLClass'></span>WordToSkillConverter</a></li>
|
|
|
|
<li><a href='50c94d49c4f3c594d8dd4cb8c67e2b7c.html'><span class='node-icon staruml-icon icon-UMLClass'></span>EpubToSkillConverter</a></li>
|
|
|
|
<li><a href='d17d20c49ea8b49688427e3f2da2a5de.html'><span class='node-icon staruml-icon icon-UMLClass'></span>VideoToSkillConverter</a></li>
|
|
|
|
<li><a href='1da40a6efac2b652728d1d5516dbc700.html'><span class='node-icon staruml-icon icon-UMLClass'></span>JupyterToSkillConverter</a></li>
|
|
|
|
<li><a href='975393f28febd506c6f9a4bdd7d02929.html'><span class='node-icon staruml-icon icon-UMLClass'></span>HtmlToSkillConverter</a></li>
|
|
|
|
<li><a href='5c7b162d10d20fa81377595dee3cb625.html'><span class='node-icon staruml-icon icon-UMLClass'></span>OpenAPIToSkillConverter</a></li>
|
|
|
|
<li><a href='e8fa4d95e76893b117bcc6f54753011c.html'><span class='node-icon staruml-icon icon-UMLClass'></span>AsciiDocToSkillConverter</a></li>
|
|
|
|
<li><a href='d151f121325ce00a55de3cfa14df0780.html'><span class='node-icon staruml-icon icon-UMLClass'></span>PptxToSkillConverter</a></li>
|
|
|
|
<li><a href='78badaff41241c07ff29c19231333ae4.html'><span class='node-icon staruml-icon icon-UMLClass'></span>RssToSkillConverter</a></li>
|
|
|
|
<li><a href='606976142b92c30df8a10a398571f41c.html'><span class='node-icon staruml-icon icon-UMLClass'></span>ManPageToSkillConverter</a></li>
|
|
|
|
<li><a href='7a7a8d9d20d00f70db88f89eefdee448.html'><span class='node-icon staruml-icon icon-UMLClass'></span>ConfluenceToSkillConverter</a></li>
|
|
|
|
<li><a href='a4606a82fa614707056585bc7b11c66b.html'><span class='node-icon staruml-icon icon-UMLClass'></span>NotionToSkillConverter</a></li>
|
|
|
|
<li><a href='be653f12dac76fd779cd14c40bc893b1.html'><span class='node-icon staruml-icon icon-UMLClass'></span>ChatToSkillConverter</a></li>
|
|
|
|
<li><a href='b1cc179750d7478da42c374e551b414d.html'><span class='node-icon staruml-icon icon-UMLClass'></span>UnifiedScraper</a></li>
|
|
|
|
<li><a href='5aed048619037fd409f2563cc9ad7c15.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(Scrapers→Utilities)</a></li>
|
|
|
|
<li><a href='ba72e8b427ca412c06af06c82d373fb9.html'><span class='node-icon staruml-icon icon-UMLDependency'></span>(Scrapers→Analysis)</a></li>
|
|
|
|
</ul>
|
|
</section>
|
|
|
|
|
|
|
|
<!-- Diagrams -->
|
|
|
|
|
|
|
|
<section class="element-list">
|
|
<h3>Diagrams</h3>
|
|
<ul class="nav nav-list">
|
|
|
|
<li><a href='e9f5fca281d9bc03348e2659e50d7169.html'><span class='node-icon staruml-icon icon-UMLPackageDiagram'></span>Skill Seekers Architecture</a></li>
|
|
|
|
</ul>
|
|
</section>
|
|
|
|
|
|
|
|
|
|
<!-- Image Map Resizer (https://github.com/davidjbradshaw/image-map-resizer) -->
|
|
<script>
|
|
imageMapResize();
|
|
</script>
|
|
</div>
|
|
</body>
|
|
</html>
|