release: prepare v1.37.0 with excel-automation and capture-screen
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
Security scan passed
|
||||
Scanned at: 2025-12-11T22:24:55.327388
|
||||
Scanned at: 2026-03-02T20:00:16.607484
|
||||
Tool: gitleaks + pattern-based validation
|
||||
Content hash: d04b93ec8a47fa7b64a2d0ee9790997e5ecc212ddbfa4c2c58fddafa2424d49a
|
||||
Content hash: 058a48a82477727772269754ab2bae5bb1f575fc264a1e28f1a2cfad25656b95
|
||||
|
||||
@@ -440,7 +440,7 @@ tiaogaoren/
|
||||
└── results/
|
||||
```
|
||||
|
||||
**See:** `~/workspace/prompts/tiaogaoren/` for full implementation.
|
||||
**See:** `./tiaogaoren/` (example project root) for full implementation.
|
||||
|
||||
## Resources
|
||||
|
||||
|
||||
130
promptfoo-evaluation/scripts/metrics.py
Executable file
130
promptfoo-evaluation/scripts/metrics.py
Executable file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reusable assertion helpers for Promptfoo Python checks.
|
||||
|
||||
This module is referenced by examples in promptfoo-evaluation/SKILL.md.
|
||||
All functions return Promptfoo-compatible result dicts.
|
||||
"""
|
||||
|
||||
|
||||
def _coerce_text(output):
|
||||
"""Normalize Promptfoo output payloads into plain text."""
|
||||
if output is None:
|
||||
return ""
|
||||
if isinstance(output, str):
|
||||
return output
|
||||
if isinstance(output, dict):
|
||||
# Promptfoo often provides provider response objects.
|
||||
text = output.get("output") or output.get("content") or ""
|
||||
if isinstance(text, list):
|
||||
return "\n".join(str(x) for x in text)
|
||||
return str(text)
|
||||
return str(output)
|
||||
|
||||
|
||||
def _safe_vars(context):
|
||||
if isinstance(context, dict):
|
||||
vars_dict = context.get("vars")
|
||||
if isinstance(vars_dict, dict):
|
||||
return vars_dict
|
||||
return {}
|
||||
|
||||
|
||||
def get_assert(output, context):
|
||||
"""Default assertion function used when no function name is provided."""
|
||||
text = _coerce_text(output)
|
||||
vars_dict = _safe_vars(context)
|
||||
|
||||
expected = str(vars_dict.get("expected", "")).strip()
|
||||
if not expected:
|
||||
expected = str(vars_dict.get("expected_text", "")).strip()
|
||||
|
||||
if not expected:
|
||||
return {
|
||||
"pass": bool(text.strip()),
|
||||
"score": 1.0 if text.strip() else 0.0,
|
||||
"reason": "No expected text provided; assertion checks non-empty output.",
|
||||
"named_scores": {"non_empty": 1.0 if text.strip() else 0.0},
|
||||
}
|
||||
|
||||
matched = expected in text
|
||||
return {
|
||||
"pass": matched,
|
||||
"score": 1.0 if matched else 0.0,
|
||||
"reason": "Output contains expected text." if matched else "Expected text not found.",
|
||||
"named_scores": {"contains_expected": 1.0 if matched else 0.0},
|
||||
}
|
||||
|
||||
|
||||
def custom_assert(output, context):
|
||||
"""Alias used by SKILL.md examples."""
|
||||
return get_assert(output, context)
|
||||
|
||||
|
||||
def custom_check(output, context):
|
||||
"""Check response length against min/max word constraints."""
|
||||
text = _coerce_text(output)
|
||||
vars_dict = _safe_vars(context)
|
||||
|
||||
min_words = int(vars_dict.get("min_words", 100))
|
||||
max_words = int(vars_dict.get("max_words", 500))
|
||||
words = [w for w in text.split() if w]
|
||||
count = len(words)
|
||||
|
||||
if count == 0:
|
||||
return {
|
||||
"pass": False,
|
||||
"score": 0.0,
|
||||
"reason": "Output is empty.",
|
||||
"named_scores": {"length": 0.0},
|
||||
}
|
||||
|
||||
if min_words <= count <= max_words:
|
||||
return {
|
||||
"pass": True,
|
||||
"score": 1.0,
|
||||
"reason": "Word count within configured range.",
|
||||
"named_scores": {"length": 1.0},
|
||||
}
|
||||
|
||||
if count < min_words:
|
||||
score = max(0.0, count / float(min_words))
|
||||
return {
|
||||
"pass": False,
|
||||
"score": round(score, 3),
|
||||
"reason": "Word count below minimum.",
|
||||
"named_scores": {"length": round(score, 3)},
|
||||
}
|
||||
|
||||
overflow = max(1, count - max_words)
|
||||
score = max(0.0, 1.0 - (overflow / float(max_words)))
|
||||
return {
|
||||
"pass": False,
|
||||
"score": round(score, 3),
|
||||
"reason": "Word count above maximum.",
|
||||
"named_scores": {"length": round(score, 3)},
|
||||
}
|
||||
|
||||
|
||||
def check_length(output, context):
|
||||
"""Character-length assertion used by advanced examples."""
|
||||
text = _coerce_text(output)
|
||||
vars_dict = _safe_vars(context)
|
||||
|
||||
min_chars = int(vars_dict.get("min_chars", 1))
|
||||
max_chars = int(vars_dict.get("max_chars", 3000))
|
||||
length = len(text)
|
||||
|
||||
passed = min_chars <= length <= max_chars
|
||||
if passed:
|
||||
score = 1.0
|
||||
elif length < min_chars:
|
||||
score = max(0.0, length / float(max(1, min_chars)))
|
||||
else:
|
||||
score = max(0.0, max_chars / float(max_chars + (length - max_chars)))
|
||||
|
||||
return {
|
||||
"pass": passed,
|
||||
"score": round(score, 3),
|
||||
"reason": "Character length check.",
|
||||
"named_scores": {"char_length": round(score, 3)},
|
||||
}
|
||||
Reference in New Issue
Block a user