feat: Add Dart, Scala, SCSS, SASS, Elixir, Lua, Perl language detection resolves #165
This commit is contained in:
@@ -310,6 +310,67 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = {
|
||||
(r"\b_ready\s*\(", 4),
|
||||
(r"\b_process\s*\(", 4),
|
||||
],
|
||||
"dart": [
|
||||
(r"\bimport\s+['\"]package:", 5),
|
||||
(r"\bclass\s+\w+\s+extends\s+StatelessWidget", 5),
|
||||
(r"\bclass\s+\w+\s+extends\s+StatefulWidget", 5),
|
||||
(r"@override\b", 4),
|
||||
(r"\bWidget\s+build\s*\(", 5),
|
||||
(r"\bimport\s+['\"]dart:", 5),
|
||||
(r"\bfinal\s+\w+\s+\w+;", 4),
|
||||
(r"=>\s*\w+\(", 4),
|
||||
(r"\basync\s*\{", 3),
|
||||
(r"\bawait\s+", 3),
|
||||
(r"\bsetState\s*\(", 4),
|
||||
(r"\bvoid\s+main\s*\(", 3),
|
||||
],
|
||||
"scala": [
|
||||
(r"\bcase\s+class\s+\w+", 5),
|
||||
(r"\btrait\s+\w+", 5),
|
||||
(r"\bdef\s+\w+.*:\s*\w+", 5),
|
||||
(r"\bimport\s+scala\.", 4),
|
||||
(r"\bmatch\s*\{", 4),
|
||||
(r"\bval\s+\w+.*:\s*\w+", 4),
|
||||
(r"\bobject\s+\w+", 5),
|
||||
(r"=>", 3),
|
||||
(r"\bdef\s+\w+\[\w+\]", 4),
|
||||
(r"\bextends\s+\w+", 2),
|
||||
],
|
||||
"elixir": [
|
||||
(r"\bdefmodule\s+[A-Z]", 5),
|
||||
(r"\bdef\s+\w+\s+do\b", 5),
|
||||
(r"\bdefp\s+\w+", 5),
|
||||
(r"\|>", 5),
|
||||
(r"\buse\s+[A-Z]", 4),
|
||||
(r"\balias\s+[A-Z]", 4),
|
||||
(r"#\{", 4),
|
||||
(r"@[\w_]+", 3),
|
||||
(r"\bcase\s+\w+\s+do\b", 3),
|
||||
],
|
||||
"lua": [
|
||||
(r"\blocal\s+\w+\s*=", 5),
|
||||
(r"\.\.\.(?!\.)", 5),
|
||||
(r"\brepeat\b.*\buntil\b", 5),
|
||||
(r"~=", 4),
|
||||
(r"\belseif\b", 4),
|
||||
(r"\bthen\b", 3),
|
||||
(r"\bfunction\s+\w+\s*\(", 3),
|
||||
(r"\bend\b", 2),
|
||||
],
|
||||
"perl": [
|
||||
(r"\bmy\s+\$\w+", 5),
|
||||
(r"\buse\s+strict", 5),
|
||||
(r"\buse\s+warnings", 5),
|
||||
(r"\bsub\s+\w+\s*\{", 5),
|
||||
(r"\bchomp\s*\(", 5),
|
||||
(r"@\w+\s*=", 5),
|
||||
(r"%\w+\s*=", 5),
|
||||
(r"\$\w+\s*=~\s*/", 4),
|
||||
(r"\$[0-9]+", 4),
|
||||
(r"->", 3),
|
||||
],
|
||||
|
||||
|
||||
# ===== Markup/Config Languages =====
|
||||
"html": [
|
||||
(r"<!DOCTYPE\s+html>", 5),
|
||||
@@ -327,6 +388,28 @@ LANGUAGE_PATTERNS: dict[str, list[tuple[str, int]]] = {
|
||||
(r"#[\w-]+\s*\{", 2),
|
||||
(r"@import", 2),
|
||||
],
|
||||
"scss": [
|
||||
(r"\$[\w-]+\s*:", 5),
|
||||
(r"@mixin\s+[\w-]+", 5),
|
||||
(r"@include\s+[\w-]+", 5),
|
||||
(r"@extend\s+", 4),
|
||||
(r"@function\s+[\w-]+", 4),
|
||||
(r"&[:\.]", 4),
|
||||
(r"#\{", 4),
|
||||
(r"@import\s+['\"]", 3),
|
||||
(r"@if\s+", 5),
|
||||
(r"@for\s+", 5),
|
||||
(r"@each\s+", 5),
|
||||
],
|
||||
"sass": [
|
||||
(r"\$[\w-]+\s*:", 5),
|
||||
(r"=[\w-]+", 5),
|
||||
(r"\+[\w-]+", 5),
|
||||
(r"@for\s+.+\s+through\s+", 5),
|
||||
(r"@mixin\s+[\w-]+", 4),
|
||||
(r"@if\s+", 4),
|
||||
(r"^\s{2,}[\w-]+:", 3),
|
||||
],
|
||||
"json": [
|
||||
(r"^\s*\{", 3),
|
||||
(r"^\s*\[", 3),
|
||||
|
||||
@@ -119,6 +119,195 @@ class TestLanguageDetection(unittest.TestCase):
|
||||
self.assertGreaterEqual(confidence, 0.0)
|
||||
self.assertLessEqual(confidence, 1.0)
|
||||
|
||||
def test_detect_scss_with_confidence(self):
|
||||
"""Test SCSS detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
$primary-color: #3498db;
|
||||
|
||||
@mixin border-radius($radius) {
|
||||
border-radius: $radius;
|
||||
}
|
||||
|
||||
.button {
|
||||
color: $primary-color;
|
||||
@include border-radius(5px);
|
||||
|
||||
&:hover {
|
||||
background: darken($primary-color, 10%);
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "scss")
|
||||
self.assertGreater(confidence, 0.8)
|
||||
|
||||
def test_detect_dart_with_confidence(self):
|
||||
"""Test Dart detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
import 'package:flutter/material.dart';
|
||||
|
||||
class MyApp extends StatelessWidget {
|
||||
@override
|
||||
Widget build(BuildContext context) {
|
||||
return MaterialApp(
|
||||
home: Text('Hello'),
|
||||
);
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "dart")
|
||||
self.assertGreater(confidence, 0.6)
|
||||
|
||||
def test_detect_scala_with_confidence(self):
|
||||
"""Test Scala detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
case class Person(name: String, age: Int)
|
||||
|
||||
object Main extends App {
|
||||
val person = Person("Alice", 30)
|
||||
person match {
|
||||
case Person(n, a) if a >= 18 => println(s"Adult: $n")
|
||||
case _ => println("Minor")
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "scala")
|
||||
self.assertGreater(confidence, 0.7)
|
||||
|
||||
def test_detect_sass_with_confidence(self):
|
||||
"""Test SASS detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
$primary-color: #3498db
|
||||
|
||||
=border-radius($radius)
|
||||
border-radius: $radius
|
||||
|
||||
.button
|
||||
color: $primary-color
|
||||
+border-radius(5px)
|
||||
|
||||
&:hover
|
||||
background: darken($primary-color, 10%)
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "sass")
|
||||
self.assertGreater(confidence, 0.8)
|
||||
|
||||
def test_detect_elixir_with_confidence(self):
|
||||
"""Test Elixir detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
defmodule MyApp.User do
|
||||
def greet(name) do
|
||||
"Hello, #{name}"
|
||||
end
|
||||
|
||||
defp calculate_age(birth_year) do
|
||||
2024 - birth_year
|
||||
end
|
||||
|
||||
def process(data) do
|
||||
data
|
||||
|> String.trim()
|
||||
|> String.downcase()
|
||||
|> String.split(",")
|
||||
end
|
||||
end
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "elixir")
|
||||
self.assertGreater(confidence, 0.8)
|
||||
|
||||
def test_detect_lua_with_confidence(self):
|
||||
"""Test Lua detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
local function calculate_sum(numbers)
|
||||
local total = 0
|
||||
for i = 1, #numbers do
|
||||
total = total + numbers[i]
|
||||
end
|
||||
return total
|
||||
end
|
||||
|
||||
local items = {1, 2, 3, 4, 5}
|
||||
local result = calculate_sum(items)
|
||||
print("Sum: " .. result)
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "lua")
|
||||
self.assertGreater(confidence, 0.7)
|
||||
|
||||
def test_detect_perl_with_confidence(self):
|
||||
"""Test Perl detection"""
|
||||
extractor = self.PDFExtractor.__new__(self.PDFExtractor)
|
||||
from skill_seekers.cli.language_detector import LanguageDetector
|
||||
|
||||
extractor.language_detector = LanguageDetector(min_confidence=0.15)
|
||||
|
||||
code = """
|
||||
#!/usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
sub process_line {
|
||||
my $line = shift;
|
||||
chomp($line);
|
||||
|
||||
if ($line =~ /^(\w+)=(\w+)$/) {
|
||||
my ($name, $value) = ($1, $2);
|
||||
return "$name has value $value";
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
my @lines = ("foo=10", "bar=20");
|
||||
foreach my $line (@lines) {
|
||||
my $result = process_line($line);
|
||||
print $result if defined $result;
|
||||
}
|
||||
"""
|
||||
|
||||
language, confidence = extractor.detect_language_from_code(code)
|
||||
self.assertEqual(language, "perl")
|
||||
self.assertGreater(confidence, 0.8)
|
||||
|
||||
|
||||
class TestSyntaxValidation(unittest.TestCase):
|
||||
"""Test syntax validation for different languages"""
|
||||
@@ -315,7 +504,11 @@ class TestCodeBlockMerging(unittest.TestCase):
|
||||
{
|
||||
"page_number": 1,
|
||||
"code_samples": [
|
||||
{"code": "def hello():", "language": "python", "detection_method": "pattern"}
|
||||
{
|
||||
"code": "def hello():",
|
||||
"language": "python",
|
||||
"detection_method": "pattern",
|
||||
}
|
||||
],
|
||||
"code_blocks_count": 1,
|
||||
},
|
||||
@@ -346,7 +539,11 @@ class TestCodeBlockMerging(unittest.TestCase):
|
||||
{
|
||||
"page_number": 1,
|
||||
"code_samples": [
|
||||
{"code": "def foo():", "language": "python", "detection_method": "pattern"}
|
||||
{
|
||||
"code": "def foo():",
|
||||
"language": "python",
|
||||
"detection_method": "pattern",
|
||||
}
|
||||
],
|
||||
"code_blocks_count": 1,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user