diff --git a/src/skill_seekers/cli/language_detector.py b/src/skill_seekers/cli/language_detector.py index ff1b1cf..5694d35 100644 --- a/src/skill_seekers/cli/language_detector.py +++ b/src/skill_seekers/cli/language_detector.py @@ -310,6 +310,67 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = { (r"\b_ready\s*\(", 4), (r"\b_process\s*\(", 4), ], + "dart": [ + (r"\bimport\s+['\"]package:", 5), + (r"\bclass\s+\w+\s+extends\s+StatelessWidget", 5), + (r"\bclass\s+\w+\s+extends\s+StatefulWidget", 5), + (r"@override\b", 4), + (r"\bWidget\s+build\s*\(", 5), + (r"\bimport\s+['\"]dart:", 5), + (r"\bfinal\s+\w+\s+\w+;", 4), + (r"=>\s*\w+\(", 4), + (r"\basync\s*\{", 3), + (r"\bawait\s+", 3), + (r"\bsetState\s*\(", 4), + (r"\bvoid\s+main\s*\(", 3), + ], + "scala": [ + (r"\bcase\s+class\s+\w+", 5), + (r"\btrait\s+\w+", 5), + (r"\bdef\s+\w+[^:]*:\s*\w+\s*=", 5), + (r"\bimport\s+scala\.", 4), + (r"\bmatch\s*\{", 4), + (r"\bval\s+\w+\s*:\s*\w+\s*=", 4), + (r"\bobject\s+\w+", 5), + (r"=>", 3), + (r"\bdef\s+\w+\[\w+\]", 4), + (r"\bextends\s+\w+", 2), + ], + "elixir": [ + (r"\bdefmodule\s+[A-Z]", 5), + (r"\bdef\s+\w+\s+do\b", 5), + (r"\bdefp\s+\w+", 5), + (r"\|>", 5), + (r"\buse\s+[A-Z]", 4), + (r"\balias\s+[A-Z]", 4), + (r"#\{", 4), + (r"@[\w_]+", 3), + (r"\bcase\s+\w+\s+do\b", 3), + ], + "lua": [ + (r"\blocal\s+\w+\s*=", 5), + (r"\.\.\.(?!\.)", 5), + (r"\brepeat\b.*\buntil\b", 5), + (r"~=", 4), + (r"\belseif\b", 4), + (r"\bthen\b", 3), + (r"\bfunction\s+\w+\s*\(", 3), + (r"\bend\b", 2), + ], + "perl": [ + (r"\bmy\s+\$\w+", 5), + (r"\buse\s+strict", 5), + (r"\buse\s+warnings", 5), + (r"\bsub\s+\w+\s*\{", 5), + (r"\bchomp\s*\(", 5), + (r"@\w+\s*=", 5), + (r"%\w+\s*=", 5), + (r"\$\w+\s*=~\s*/", 4), + (r"\$[0-9]+", 4), + (r"->", 3), + ], + + # ===== Markup/Config Languages ===== "html": [ (r"", 5), @@ -327,6 +388,28 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = { (r"#[\w-]+\s*\{", 2), (r"@import", 2), ], + "scss": [ + (r"\$[\w-]+\s*:", 5), + (r"@mixin\s+[\w-]+", 5), + (r"@include\s+[\w-]+", 5), + (r"@extend\s+", 4), + (r"@function\s+[\w-]+", 4), + (r"&[:\.]", 4), + (r"#\{", 4), + (r"@import\s+['\"]", 3), + (r"@if\s+", 5), + (r"@for\s+", 5), + (r"@each\s+", 5), + ], + "sass": [ + (r"\$[\w-]+\s*:", 5), + (r"=[\w-]+", 5), + (r"\+[\w-]+", 5), + (r"@for\s+.+\s+through\s+", 5), + (r"@mixin\s+[\w-]+", 4), + (r"@if\s+", 4), + (r"^\s{2,}[\w-]+:", 3), + ], "json": [ (r"^\s*\{", 3), (r"^\s*\[", 3), diff --git a/tests/test_pdf_extractor.py b/tests/test_pdf_extractor.py index 95dc878..f8897c7 100644 --- a/tests/test_pdf_extractor.py +++ b/tests/test_pdf_extractor.py @@ -119,6 +119,195 @@ class TestLanguageDetection(unittest.TestCase): self.assertGreaterEqual(confidence, 0.0) self.assertLessEqual(confidence, 1.0) + def test_detect_scss_with_confidence(self): + """Test SCSS detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + $primary-color: #3498db; + + @mixin border-radius($radius) { + border-radius: $radius; + } + + .button { + color: $primary-color; + @include border-radius(5px); + + &:hover { + background: darken($primary-color, 10%); + } + } + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "scss") + self.assertGreater(confidence, 0.8) + + def test_detect_dart_with_confidence(self): + """Test Dart detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + import 'package:flutter/material.dart'; + + class MyApp extends StatelessWidget { + @override + Widget build(BuildContext context) { + return MaterialApp( + home: Text('Hello'), + ); + } + } + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "dart") + self.assertGreater(confidence, 0.6) + + def test_detect_scala_with_confidence(self): + """Test Scala detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + case class Person(name: String, age: Int) + + object Main extends App { + val person = Person("Alice", 30) + person match { + case Person(n, a) if a >= 18 => println(s"Adult: $n") + case _ => println("Minor") + } + } + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "scala") + self.assertGreater(confidence, 0.7) + + def test_detect_sass_with_confidence(self): + """Test SASS detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + $primary-color: #3498db + + =border-radius($radius) + border-radius: $radius + + .button + color: $primary-color + +border-radius(5px) + + &:hover + background: darken($primary-color, 10%) + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "sass") + self.assertGreater(confidence, 0.8) + + def test_detect_elixir_with_confidence(self): + """Test Elixir detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + defmodule MyApp.User do + def greet(name) do + "Hello, #{name}" + end + + defp calculate_age(birth_year) do + 2024 - birth_year + end + + def process(data) do + data + |> String.trim() + |> String.downcase() + |> String.split(",") + end + end + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "elixir") + self.assertGreater(confidence, 0.8) + + def test_detect_lua_with_confidence(self): + """Test Lua detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = """ + local function calculate_sum(numbers) + local total = 0 + for i = 1, #numbers do + total = total + numbers[i] + end + return total + end + + local items = {1, 2, 3, 4, 5} + local result = calculate_sum(items) + print("Sum: " .. result) + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "lua") + self.assertGreater(confidence, 0.7) + + def test_detect_perl_with_confidence(self): + """Test Perl detection""" + extractor = self.PDFExtractor.__new__(self.PDFExtractor) + from skill_seekers.cli.language_detector import LanguageDetector + + extractor.language_detector = LanguageDetector(min_confidence=0.15) + + code = r""" + #!/usr/bin/perl + use strict; + use warnings; + + sub process_line { + my $line = shift; + chomp($line); + + if ($line =~ /^(\w+)=(\w+)$/) { + my ($name, $value) = ($1, $2); + return "$name has value $value"; + } + return undef; + } + + my @lines = ("foo=10", "bar=20"); + foreach my $line (@lines) { + my $result = process_line($line); + print $result if defined $result; + } + """ + + language, confidence = extractor.detect_language_from_code(code) + self.assertEqual(language, "perl") + self.assertGreater(confidence, 0.8) + class TestSyntaxValidation(unittest.TestCase): """Test syntax validation for different languages""" @@ -315,7 +504,11 @@ class TestCodeBlockMerging(unittest.TestCase): { "page_number": 1, "code_samples": [ - {"code": "def hello():", "language": "python", "detection_method": "pattern"} + { + "code": "def hello():", + "language": "python", + "detection_method": "pattern", + } ], "code_blocks_count": 1, }, @@ -346,7 +539,11 @@ class TestCodeBlockMerging(unittest.TestCase): { "page_number": 1, "code_samples": [ - {"code": "def foo():", "language": "python", "detection_method": "pattern"} + { + "code": "def foo():", + "language": "python", + "detection_method": "pattern", + } ], "code_blocks_count": 1, },