diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py index f12448e..58e354e 100755 --- a/src/skill_seekers/cli/doc_scraper.py +++ b/src/skill_seekers/cli/doc_scraper.py @@ -170,7 +170,7 @@ class DocToSkillConverter: } try: - with open(self.checkpoint_file, 'w') as f: + with open(self.checkpoint_file, 'w', encoding='utf-8') as f: json.dump(checkpoint_data, f, indent=2) logger.info(" 💾 Checkpoint saved (%d pages)", self.pages_scraped) except Exception as e: @@ -183,7 +183,7 @@ class DocToSkillConverter: return try: - with open(self.checkpoint_file, 'r') as f: + with open(self.checkpoint_file, 'r', encoding='utf-8') as f: checkpoint_data = json.load(f) self.visited_urls = set(checkpoint_data["visited_urls"]) @@ -1307,7 +1307,7 @@ def load_config(config_path: str) -> Dict[str, Any]: 'react' """ try: - with open(config_path, 'r') as f: + with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) except json.JSONDecodeError as e: logger.error("❌ Error: Invalid JSON in config file: %s", config_path) @@ -1413,7 +1413,7 @@ def check_existing_data(name: str) -> Tuple[bool, int]: """ data_dir = f"output/{name}_data" if os.path.exists(data_dir) and os.path.exists(f"{data_dir}/summary.json"): - with open(f"{data_dir}/summary.json", 'r') as f: + with open(f"{data_dir}/summary.json", 'r', encoding='utf-8') as f: summary = json.load(f) return True, summary.get('total_pages', 0) return False, 0 diff --git a/src/skill_seekers/cli/github_scraper.py b/src/skill_seekers/cli/github_scraper.py index b33293c..db7a7e7 100644 --- a/src/skill_seekers/cli/github_scraper.py +++ b/src/skill_seekers/cli/github_scraper.py @@ -919,7 +919,7 @@ Examples: # Build config from args or file if args.config: - with open(args.config, 'r') as f: + with open(args.config, 'r', encoding='utf-8') as f: config = json.load(f) elif args.repo: config = { diff --git a/src/skill_seekers/cli/unified_scraper.py b/src/skill_seekers/cli/unified_scraper.py index 3e7a5c6..78bec51 100644 --- a/src/skill_seekers/cli/unified_scraper.py +++ b/src/skill_seekers/cli/unified_scraper.py @@ -131,7 +131,7 @@ class UnifiedScraper: # Write temporary config temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json') - with open(temp_config_path, 'w') as f: + with open(temp_config_path, 'w', encoding='utf-8') as f: json.dump(doc_config, f, indent=2) # Run doc_scraper as subprocess @@ -150,7 +150,7 @@ class UnifiedScraper: docs_data_file = f"output/{doc_config['name']}_data/summary.json" if os.path.exists(docs_data_file): - with open(docs_data_file, 'r') as f: + with open(docs_data_file, 'r', encoding='utf-8') as f: summary = json.load(f) self.scraped_data['documentation'] = { @@ -202,7 +202,7 @@ class UnifiedScraper: # Save data github_data_file = os.path.join(self.data_dir, 'github_data.json') - with open(github_data_file, 'w') as f: + with open(github_data_file, 'w', encoding='utf-8') as f: json.dump(github_data, f, indent=2, ensure_ascii=False) self.scraped_data['github'] = { @@ -236,7 +236,7 @@ class UnifiedScraper: # Save data pdf_data_file = os.path.join(self.data_dir, 'pdf_data.json') - with open(pdf_data_file, 'w') as f: + with open(pdf_data_file, 'w', encoding='utf-8') as f: json.dump(pdf_data, f, indent=2, ensure_ascii=False) self.scraped_data['pdf'] = { @@ -272,10 +272,10 @@ class UnifiedScraper: return [] # Load data files - with open(docs_data['data_file'], 'r') as f: + with open(docs_data['data_file'], 'r', encoding='utf-8') as f: docs_json = json.load(f) - with open(github_data['data_file'], 'r') as f: + with open(github_data['data_file'], 'r', encoding='utf-8') as f: github_json = json.load(f) # Detect conflicts @@ -322,10 +322,10 @@ class UnifiedScraper: github_data = self.scraped_data.get('github', {}) # Load data - with open(docs_data['data_file'], 'r') as f: + with open(docs_data['data_file'], 'r', encoding='utf-8') as f: docs_json = json.load(f) - with open(github_data['data_file'], 'r') as f: + with open(github_data['data_file'], 'r', encoding='utf-8') as f: github_json = json.load(f) # Choose merger @@ -339,7 +339,7 @@ class UnifiedScraper: # Save merged data merged_file = os.path.join(self.data_dir, 'merged_data.json') - with open(merged_file, 'w') as f: + with open(merged_file, 'w', encoding='utf-8') as f: json.dump(merged_data, f, indent=2, ensure_ascii=False) logger.info(f"✅ Merged data saved: {merged_file}") @@ -361,7 +361,7 @@ class UnifiedScraper: conflicts = [] conflicts_file = os.path.join(self.data_dir, 'conflicts.json') if os.path.exists(conflicts_file): - with open(conflicts_file, 'r') as f: + with open(conflicts_file, 'r', encoding='utf-8') as f: conflicts_data = json.load(f) conflicts = conflicts_data.get('conflicts', [])