feat: Add Dart, Scala, SCSS, SASS, Elixir, Lua, Perl language detection resolves #165

This commit is contained in:
pawu
2026-02-01 15:15:30 +05:30
parent a16eee0e7f
commit 427ea176c6
2 changed files with 282 additions and 2 deletions

View File

@@ -310,6 +310,67 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = {
(r"\b_ready\s*\(", 4),
(r"\b_process\s*\(", 4),
],
"dart": [
(r"\bimport\s+['\"]package:", 5),
(r"\bclass\s+\w+\s+extends\s+StatelessWidget", 5),
(r"\bclass\s+\w+\s+extends\s+StatefulWidget", 5),
(r"@override\b", 4),
(r"\bWidget\s+build\s*\(", 5),
(r"\bimport\s+['\"]dart:", 5),
(r"\bfinal\s+\w+\s+\w+;", 4),
(r"=>\s*\w+\(", 4),
(r"\basync\s*\{", 3),
(r"\bawait\s+", 3),
(r"\bsetState\s*\(", 4),
(r"\bvoid\s+main\s*\(", 3),
],
"scala": [
(r"\bcase\s+class\s+\w+", 5),
(r"\btrait\s+\w+", 5),
(r"\bdef\s+\w+.*:\s*\w+", 5),
(r"\bimport\s+scala\.", 4),
(r"\bmatch\s*\{", 4),
(r"\bval\s+\w+.*:\s*\w+", 4),
(r"\bobject\s+\w+", 5),
(r"=>", 3),
(r"\bdef\s+\w+\[\w+\]", 4),
(r"\bextends\s+\w+", 2),
],
"elixir": [
(r"\bdefmodule\s+[A-Z]", 5),
(r"\bdef\s+\w+\s+do\b", 5),
(r"\bdefp\s+\w+", 5),
(r"\|>", 5),
(r"\buse\s+[A-Z]", 4),
(r"\balias\s+[A-Z]", 4),
(r"#\{", 4),
(r"@[\w_]+", 3),
(r"\bcase\s+\w+\s+do\b", 3),
],
"lua": [
(r"\blocal\s+\w+\s*=", 5),
(r"\.\.\.(?!\.)", 5),
(r"\brepeat\b.*\buntil\b", 5),
(r"~=", 4),
(r"\belseif\b", 4),
(r"\bthen\b", 3),
(r"\bfunction\s+\w+\s*\(", 3),
(r"\bend\b", 2),
],
"perl": [
(r"\bmy\s+\$\w+", 5),
(r"\buse\s+strict", 5),
(r"\buse\s+warnings", 5),
(r"\bsub\s+\w+\s*\{", 5),
(r"\bchomp\s*\(", 5),
(r"@\w+\s*=", 5),
(r"%\w+\s*=", 5),
(r"\$\w+\s*=~\s*/", 4),
(r"\$[0-9]+", 4),
(r"->", 3),
],
# ===== Markup/Config Languages =====
"html": [
(r"<!DOCTYPE\s+html>", 5),
@@ -327,6 +388,28 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = {
(r"#[\w-]+\s*\{", 2),
(r"@import", 2),
],
"scss": [
(r"\$[\w-]+\s*:", 5),
(r"@mixin\s+[\w-]+", 5),
(r"@include\s+[\w-]+", 5),
(r"@extend\s+", 4),
(r"@function\s+[\w-]+", 4),
(r"&[:\.]", 4),
(r"#\{", 4),
(r"@import\s+['\"]", 3),
(r"@if\s+", 5),
(r"@for\s+", 5),
(r"@each\s+", 5),
],
"sass": [
(r"\$[\w-]+\s*:", 5),
(r"=[\w-]+", 5),
(r"\+[\w-]+", 5),
(r"@for\s+.+\s+through\s+", 5),
(r"@mixin\s+[\w-]+", 4),
(r"@if\s+", 4),
(r"^\s{2,}[\w-]+:", 3),
],
"json": [
(r"^\s*\{", 3),
(r"^\s*\[", 3),

View File

@@ -119,6 +119,195 @@ class TestLanguageDetection(unittest.TestCase):
self.assertGreaterEqual(confidence, 0.0)
self.assertLessEqual(confidence, 1.0)
def test_detect_scss_with_confidence(self):
"""Test SCSS detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
$primary-color: #3498db;
@mixin border-radius($radius) {
border-radius: $radius;
}
.button {
color: $primary-color;
@include border-radius(5px);
&:hover {
background: darken($primary-color, 10%);
}
}
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "scss")
self.assertGreater(confidence, 0.8)
def test_detect_dart_with_confidence(self):
"""Test Dart detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
import 'package:flutter/material.dart';
class MyApp extends StatelessWidget {
@override
Widget build(BuildContext context) {
return MaterialApp(
home: Text('Hello'),
);
}
}
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "dart")
self.assertGreater(confidence, 0.6)
def test_detect_scala_with_confidence(self):
"""Test Scala detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
case class Person(name: String, age: Int)
object Main extends App {
val person = Person("Alice", 30)
person match {
case Person(n, a) if a >= 18 => println(s"Adult: $n")
case _ => println("Minor")
}
}
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "scala")
self.assertGreater(confidence, 0.7)
def test_detect_sass_with_confidence(self):
"""Test SASS detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
$primary-color: #3498db
=border-radius($radius)
border-radius: $radius
.button
color: $primary-color
+border-radius(5px)
&:hover
background: darken($primary-color, 10%)
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "sass")
self.assertGreater(confidence, 0.8)
def test_detect_elixir_with_confidence(self):
"""Test Elixir detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
defmodule MyApp.User do
def greet(name) do
"Hello, #{name}"
end
defp calculate_age(birth_year) do
2024 - birth_year
end
def process(data) do
data
|> String.trim()
|> String.downcase()
|> String.split(",")
end
end
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "elixir")
self.assertGreater(confidence, 0.8)
def test_detect_lua_with_confidence(self):
"""Test Lua detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
local function calculate_sum(numbers)
local total = 0
for i = 1, #numbers do
total = total + numbers[i]
end
return total
end
local items = {1, 2, 3, 4, 5}
local result = calculate_sum(items)
print("Sum: " .. result)
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "lua")
self.assertGreater(confidence, 0.7)
def test_detect_perl_with_confidence(self):
"""Test Perl detection"""
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
from skill_seekers.cli.language_detector import LanguageDetector
extractor.language_detector = LanguageDetector(min_confidence=0.15)
code = """
#!/usr/bin/perl
use strict;
use warnings;
sub process_line {
my $line = shift;
chomp($line);
if ($line =~ /^(\w+)=(\w+)$/) {
my ($name, $value) = ($1, $2);
return "$name has value $value";
}
return undef;
}
my @lines = ("foo=10", "bar=20");
foreach my $line (@lines) {
my $result = process_line($line);
print $result if defined $result;
}
"""
language, confidence = extractor.detect_language_from_code(code)
self.assertEqual(language, "perl")
self.assertGreater(confidence, 0.8)
class TestSyntaxValidation(unittest.TestCase):
"""Test syntax validation for different languages"""
@@ -315,7 +504,11 @@ class TestCodeBlockMerging(unittest.TestCase):
{
"page_number": 1,
"code_samples": [
{"code": "def hello():", "language": "python", "detection_method": "pattern"}
{
"code": "def hello():",
"language": "python",
"detection_method": "pattern",
}
],
"code_blocks_count": 1,
},
@@ -346,7 +539,11 @@ class TestCodeBlockMerging(unittest.TestCase):
{
"page_number": 1,
"code_samples": [
{"code": "def foo():", "language": "python", "detection_method": "pattern"}
{
"code": "def foo():",
"language": "python",
"detection_method": "pattern",
}
],
"code_blocks_count": 1,
},