{ "summary": { "suite_count": 3, "total_cases": 66, "family_count": 21, "false_positives": 0, "false_negatives": 0, "average_precision": 1.0, "average_recall": 1.0 }, "family_summary": { "workflow_to_skill": { "total": 5, "passed": 5, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "iterate_existing_skill": { "total": 5, "passed": 5, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "package_for_team": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "meta_skill_creation": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "paraphrase_trigger": { "total": 5, "passed": 5, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_trigger": { "total": 4, "passed": 4, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_only": { "total": 5, "passed": 5, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "summary_only": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "translate_only": { "total": 4, "passed": 4, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_only": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_summary_only": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_document_only": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "one_off_vs_reusable": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "brainstorm_vs_build": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_export_vs_agent_skill": { "total": 4, "passed": 4, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "partial_scaffold_not_full_skill": { "total": 4, "passed": 4, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_near_neighbor": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "complex_multi_asset": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "brainstorm_only": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_not_package": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "future_outline_vs_build": { "total": 4, "passed": 4, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 } }, "suites": { "train": { "threshold": 0.33, "threshold_explanation": "Prompts at or above the threshold are treated as trigger matches. Scores are driven primarily by semantic intent coverage: packaging intent, workflow-to-skill transformation intent, reuse/distribution intent, and eval intent. Explicit exclusions such as summary-only, translation-only, one-off, document-only, or do-not-build directives apply direct penalties and can override otherwise similar wording.", "false_positives": 0, "false_negatives": 0, "precision": 1.0, "recall": 1.0, "bucket_stats": { "should_trigger": { "total": 12, "passed": 12, "pass_rate": 1.0 }, "should_not_trigger": { "total": 11, "passed": 11, "pass_rate": 1.0 }, "near_neighbor": { "total": 8, "passed": 8, "pass_rate": 1.0 } }, "family_stats": { "workflow_to_skill": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "iterate_existing_skill": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "package_for_team": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "meta_skill_creation": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "paraphrase_trigger": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_trigger": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_only": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "summary_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "translate_only": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_only": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_summary_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_document_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "one_off_vs_reusable": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "brainstorm_vs_build": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_export_vs_agent_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "partial_scaffold_not_full_skill": { "total": 3, "passed": 3, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_near_neighbor": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 } }, "misfires": [], "results": { "should_trigger": [ { "prompt": "Create a skill from this repeated workflow.", "family": "workflow_to_skill", "score": 0.549, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.714, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "create a skill" ], "transform_workflow": [ "workflow" ] }, "negative": {} } } }, { "prompt": "Turn this runbook into a reusable agent skill.", "family": "workflow_to_skill", "score": 0.729, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.625, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "agent skill" ], "reuse_package": [ "reusable" ], "transform_workflow": [ "runbook" ] }, "negative": {} } } }, { "prompt": "Convert this process note into a reusable skill package for the team.", "family": "workflow_to_skill", "score": 0.766, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.709, "support_score": 0, "lexical_support": 0.667, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "reusable", "package" ], "team_operationalize": [ "team" ], "transform_workflow": [ "process" ] }, "negative": {} } } }, { "prompt": "Improve this skill description and add evals.", "family": "iterate_existing_skill", "score": 0.353, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "eval_optimize", "iterate_existing_skill" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.291, "support_score": 0, "lexical_support": 0.571, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "eval_optimize": [ "evals" ], "iterate_existing_skill": [ "this skill", "skill description" ] }, "negative": {} } }, "boundary_case": true }, { "prompt": "Refactor this prompt into a proper skill package.", "family": "iterate_existing_skill", "score": 0.532, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.5, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ] }, "negative": {} } } }, { "prompt": "Package this skill for team reuse.", "family": "package_for_team", "score": 0.448, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "iterate_existing_skill", "reuse_package", "team_operationalize" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.667, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "iterate_existing_skill": [ "this skill", "package this skill" ], "reuse_package": [ "reuse", "package", "team reuse" ], "team_operationalize": [ "team", "team reuse" ] }, "negative": {} } } }, { "prompt": "Create a meta-skill for packaging internal workflows.", "family": "meta_skill_creation", "score": 0.591, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "team_operationalize" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.527, "support_score": 0, "lexical_support": 0.571, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "meta skill", "meta-skill" ], "reuse_package": [ "packaging", "internal workflows" ], "team_operationalize": [ "internal" ] }, "negative": {} } } }, { "prompt": "Standardize this recurring workflow as a shareable skill package with references.", "family": "package_for_team", "score": 0.749, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.709, "support_score": 0.06, "lexical_support": 0.273, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "multi_asset": [ "references" ], "reuse_package": [ "package" ], "team_operationalize": [ "standardize" ], "transform_workflow": [ "workflow" ] }, "negative": {} } } }, { "prompt": "Codify this recurring support escalation workflow into a maintained skill for the ops library.", "family": "paraphrase_trigger", "score": 0.397, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.286, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "ops library", "maintained" ], "transform_workflow": [ "workflow" ] }, "negative": {} } }, "boundary_case": true }, { "prompt": "Operationalize this workflow as a reusable skill with validation notes and team library metadata.", "family": "paraphrase_trigger", "score": 0.88, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.855, "support_score": 0, "lexical_support": 0.429, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "operationalize this workflow as a reusable skill" ], "eval_optimize": [ "validation" ], "reuse_package": [ "reusable", "team library", "operationalize" ], "team_operationalize": [ "team" ], "transform_workflow": [ "workflow" ] }, "negative": {} } } }, { "prompt": "Below are release notes, rough notes, and a transcript from the last handoff. Ignore the noise and turn this runbook into a reusable agent skill with references and scripts so the team can reuse it every quarter.", "family": "long_context_trigger", "score": 0.756, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.709, "support_score": 0.06, "lexical_support": 0.355, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "agent skill" ], "multi_asset": [ "references", "scripts" ], "reuse_package": [ "reusable", "reuse" ], "team_operationalize": [ "team" ], "transform_workflow": [ "runbook", "rough notes", "transcript", "release notes" ] }, "negative": {} } } }, { "prompt": "We have a standard operating procedure, checklist, and prompt history scattered across docs. Formalize this process into a reusable capability, add validation, and package it for team reuse.", "family": "long_context_trigger", "score": 0.88, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.855, "support_score": 0, "lexical_support": 0.423, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "formalize this process into a reusable capability" ], "eval_optimize": [ "validation" ], "reuse_package": [ "reusable", "reuse", "package", "team reuse" ], "team_operationalize": [ "team", "team reuse" ], "transform_workflow": [ "process", "checklist", "prompt history", "standard operating procedure" ] }, "negative": {} } } } ], "should_not_trigger": [ { "prompt": "Explain what a workflow is.", "family": "explain_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only" ], "exclusive_negative_concepts": [ "explain_only" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.2, "negative_penalty": 0.26, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "workflow" ] }, "negative": { "explain_only": [ "explain what" ] } } } }, { "prompt": "Just explain what a skill is.", "family": "explain_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only" ], "exclusive_negative_concepts": [ "explain_only" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.333, "negative_penalty": 0.26, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "explain_only": [ "explain what", "just explain" ] } } } }, { "prompt": "Summarize this random note.", "family": "summary_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "summarize_only" ], "exclusive_negative_concepts": [ "summarize_only" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.0, "negative_penalty": 0.3, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "summarize_only": [ "summarize" ] } } } }, { "prompt": "Translate this README into Japanese.", "family": "translate_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "translate_only" ], "exclusive_negative_concepts": [ "translate_only" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.25, "negative_penalty": 0.53, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "document_only": [ "readme" ], "translate_only": [ "translate", "into japanese" ] } } } }, { "prompt": "Write a product headline for this landing page.", "family": "document_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only" ], "exclusive_negative_concepts": [], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.25, "negative_penalty": 0.18, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "document_only": [ "headline", "landing page" ] } } } }, { "prompt": "Draft a blog title for this article.", "family": "document_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only" ], "exclusive_negative_concepts": [], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.286, "negative_penalty": 0.18, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "document_only": [ "article", "blog title" ] } } } }, { "prompt": "Rewrite this paragraph more clearly, but do not package anything.", "family": "document_only", "score": 0.175, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.1, "negative_penalty": 0, "coverage_boost": 0.0, "concept_evidence": { "positive": { "reuse_package": [ "package" ] }, "negative": {} } } }, { "prompt": "Explain the difference between a runbook and a workflow.", "family": "explain_only", "score": 0.187, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.25, "negative_penalty": 0, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "workflow", "runbook" ] }, "negative": {} } } }, { "prompt": "Below is a long SOP, but I only want a summary for leadership and a recap of the main points. Do not build anything reusable.", "family": "long_context_summary_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive", "summarize_only" ], "exclusive_negative_concepts": [ "no_build_directive", "summarize_only" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.19, "negative_penalty": 0.72, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ], "transform_workflow": [ "sop" ] }, "negative": { "no_build_directive": [ "do not build" ], "summarize_only": [ "summary", "only want a summary", "recap" ] } } } }, { "prompt": "Keep this as documentation only for the knowledge base and rewrite it as a reference document.", "family": "long_context_document_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only" ], "exclusive_negative_concepts": [], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.2, "negative_penalty": 0.18, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "document_only": [ "document", "reference document", "knowledge base" ] } } } }, { "prompt": "Translate this long onboarding flow into French and keep it as documentation only with no skill package.", "family": "translate_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive", "translate_only" ], "exclusive_negative_concepts": [ "no_build_directive", "translate_only" ], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.235, "negative_penalty": 0.77, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ], "transform_workflow": [ "onboarding flow" ] }, "negative": { "no_build_directive": [ "no skill package", "keep it as documentation only" ], "translate_only": [ "translate", "into french" ] } } } } ], "near_neighbor": [ { "prompt": "Create a one-off prompt for this task.", "family": "one_off_vs_reusable", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "one_off_request" ], "exclusive_negative_concepts": [ "one_off_request" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.429, "negative_penalty": 0.24, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "one_off_request": [ "one-off" ] } } } }, { "prompt": "Write a custom answer for this request without creating a skill package.", "family": "one_off_vs_reusable", "score": 0.131, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "one_off_request" ], "exclusive_negative_concepts": [ "one_off_request" ], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.364, "negative_penalty": 0.24, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ] }, "negative": { "one_off_request": [ "custom answer", "for this request" ] } } } }, { "prompt": "Help me brainstorm process ideas without building a skill.", "family": "brainstorm_vs_build", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "brainstorm_only", "no_build_directive" ], "exclusive_negative_concepts": [ "brainstorm_only", "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.333, "negative_penalty": 0.66, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "process" ] }, "negative": { "brainstorm_only": [ "brainstorm", "ideas" ], "no_build_directive": [ "without building" ] } } } }, { "prompt": "Improve this README but do not turn it into a skill.", "family": "document_export_vs_agent_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.4, "negative_penalty": 0.6, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "document_only": [ "readme" ], "no_build_directive": [ "do not turn it into a skill" ] } } } }, { "prompt": "Make a checklist for this task, but not a reusable skill.", "family": "partial_scaffold_not_full_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.4, "negative_penalty": 0.42, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ], "transform_workflow": [ "checklist" ] }, "negative": { "no_build_directive": [ "not a reusable skill" ] } } } }, { "prompt": "Create a reusable-looking checklist, but keep it as a plain note instead of a skill.", "family": "partial_scaffold_not_full_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.231, "negative_penalty": 0.42, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ], "transform_workflow": [ "checklist" ] }, "negative": { "no_build_directive": [ "plain note", "keep it as a plain note" ] } } } }, { "prompt": "We may convert this workflow later, but for now keep it as documentation only and produce a reference document.", "family": "long_context_near_neighbor", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.158, "negative_penalty": 0.6, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "workflow" ] }, "negative": { "document_only": [ "document", "reference document" ], "no_build_directive": [ "keep it as documentation only" ] } } } }, { "prompt": "Draft a template only from this process and stop short of making a full skill package.", "family": "partial_scaffold_not_full_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.4, "negative_penalty": 0.6, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ], "transform_workflow": [ "process" ] }, "negative": { "document_only": [ "template only" ], "no_build_directive": [ "stop short of making a full skill" ] } } } } ] }, "comparison": { "baseline_false_positives": 0, "baseline_false_negatives": 1, "improved_false_positives": 0, "improved_false_negatives": 0, "false_positive_delta": 0, "false_negative_delta": -1, "baseline_precision": 1.0, "improved_precision": 1.0, "baseline_recall": 0.917, "improved_recall": 1.0 }, "returncode": 0 }, "dev": { "threshold": 0.33, "threshold_explanation": "Prompts at or above the threshold are treated as trigger matches. Scores are driven primarily by semantic intent coverage: packaging intent, workflow-to-skill transformation intent, reuse/distribution intent, and eval intent. Explicit exclusions such as summary-only, translation-only, one-off, document-only, or do-not-build directives apply direct penalties and can override otherwise similar wording.", "false_positives": 0, "false_negatives": 0, "precision": 1.0, "recall": 1.0, "bucket_stats": { "should_trigger": { "total": 8, "passed": 8, "pass_rate": 1.0 }, "should_not_trigger": { "total": 7, "passed": 7, "pass_rate": 1.0 }, "near_neighbor": { "total": 7, "passed": 7, "pass_rate": 1.0 } }, "family_stats": { "workflow_to_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "iterate_existing_skill": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "complex_multi_asset": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "paraphrase_trigger": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_trigger": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "brainstorm_only": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "translate_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "summary_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_document_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_summary_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_not_package": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "partial_scaffold_not_full_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_export_vs_agent_skill": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "future_outline_vs_build": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_near_neighbor": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 } }, "misfires": [], "results": { "should_trigger": [ { "prompt": "Convert this operations checklist into a reusable skill.", "family": "workflow_to_skill", "score": 0.415, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.5, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ], "transform_workflow": [ "checklist", "operations checklist" ] }, "negative": {} } } }, { "prompt": "Add trigger evals to this skill before sharing it with the team.", "family": "iterate_existing_skill", "score": 0.388, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.327, "support_score": 0, "lexical_support": 0.333, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "eval_optimize": [ "evals", "trigger evals" ], "iterate_existing_skill": [ "this skill", "sharing it with the team", "before sharing it with the team" ], "team_operationalize": [ "team", "sharing it with the team" ] }, "negative": {} } }, "boundary_case": true }, { "prompt": "We have a messy release runbook, export process, and a prompt history; turn all of that into one reusable skill package with evals and packaging checks.", "family": "complex_multi_asset", "score": 0.843, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.818, "support_score": 0, "lexical_support": 0.375, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package", "turn all of that into one reusable skill package" ], "eval_optimize": [ "evals", "packaging checks" ], "reuse_package": [ "reusable", "package", "packaging" ], "transform_workflow": [ "runbook", "process", "prompt history" ] }, "negative": {} } } }, { "prompt": "Turn these workflow fragments, transcripts, and prompts into one production-ready skill with scripts and references.", "family": "complex_multi_asset", "score": 0.418, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.364, "support_score": 0.06, "lexical_support": 0.357, "negative_penalty": 0, "coverage_boost": 0.04, "concept_evidence": { "positive": { "multi_asset": [ "references", "scripts" ], "reuse_package": [ "production-ready" ], "transform_workflow": [ "workflow", "workflow fragments" ] }, "negative": {} } } }, { "prompt": "Tighten the trigger boundary on this existing skill and prepare it for team distribution.", "family": "iterate_existing_skill", "score": 0.557, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "eval_optimize", "iterate_existing_skill", "reuse_package", "team_operationalize" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.509, "support_score": 0, "lexical_support": 0.357, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "eval_optimize": [ "trigger boundary" ], "iterate_existing_skill": [ "existing skill" ], "reuse_package": [ "distribution" ], "team_operationalize": [ "team", "distribution" ] }, "negative": {} } } }, { "prompt": "Turn the SOP, transcript, and cleanup notes into a maintained skill for the team library.", "family": "paraphrase_trigger", "score": 0.471, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.4, "support_score": 0, "lexical_support": 0.538, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "reuse_package": [ "team library", "maintained" ], "team_operationalize": [ "team" ], "transform_workflow": [ "sop", "transcript" ] }, "negative": {} } } }, { "prompt": "Codify this workflow into a shared capability package and validate the route boundary before distribution.", "family": "paraphrase_trigger", "score": 0.868, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.855, "support_score": 0, "lexical_support": 0.267, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "codify this workflow" ], "eval_optimize": [ "validate", "route boundary" ], "reuse_package": [ "package", "distribution", "shared capability" ], "team_operationalize": [ "shared", "distribution" ], "transform_workflow": [ "workflow" ] }, "negative": {} } } }, { "prompt": "After the long handoff notes below, build a skill package for team reuse. The materials include a transcript, rough notes, and an onboarding flow, and the result should include references, scripts, and validation.", "family": "long_context_trigger", "score": 0.888, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.855, "support_score": 0.06, "lexical_support": 0.333, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "build a skill", "skill package" ], "eval_optimize": [ "validation" ], "multi_asset": [ "references", "scripts" ], "reuse_package": [ "reuse", "package", "team reuse" ], "team_operationalize": [ "team", "team reuse" ], "transform_workflow": [ "rough notes", "transcript", "onboarding flow" ] }, "negative": {} } } } ], "should_not_trigger": [ { "prompt": "Give me ideas for improving our process.", "family": "brainstorm_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "brainstorm_only" ], "exclusive_negative_concepts": [ "brainstorm_only" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.286, "negative_penalty": 0.24, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "process" ] }, "negative": { "brainstorm_only": [ "ideas" ] } } } }, { "prompt": "Review this note and tell me what it means in plain English.", "family": "explain_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only" ], "exclusive_negative_concepts": [ "explain_only" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.083, "negative_penalty": 0.26, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "explain_only": [ "tell me what it means", "plain english" ] } } } }, { "prompt": "Help me brainstorm several ways to improve packaging, but do not generate any skill files.", "family": "brainstorm_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [ "brainstorm_only", "no_build_directive" ], "exclusive_negative_concepts": [ "brainstorm_only", "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0.06, "lexical_support": 0.2, "negative_penalty": 0.66, "coverage_boost": 0.0, "concept_evidence": { "positive": { "multi_asset": [ "files" ], "reuse_package": [ "packaging" ] }, "negative": { "brainstorm_only": [ "brainstorm", "improve packaging" ], "no_build_directive": [ "do not generate any skill files" ] } } } }, { "prompt": "Translate these notes into French and keep the structure exactly as-is.", "family": "translate_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "translate_only" ], "exclusive_negative_concepts": [ "translate_only" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.273, "negative_penalty": 0.35, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "translate_only": [ "translate", "into french" ] } } } }, { "prompt": "Summarize this workflow and list the main points only.", "family": "summary_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "summarize_only" ], "exclusive_negative_concepts": [ "summarize_only" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.111, "negative_penalty": 0.3, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "workflow" ] }, "negative": { "summarize_only": [ "summarize" ] } } } }, { "prompt": "This is a long incident write-up. Keep it as documentation only, explain it in simpler language, and do not convert it into a reusable capability.", "family": "long_context_document_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only", "no_build_directive" ], "exclusive_negative_concepts": [ "explain_only", "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.182, "negative_penalty": 0.68, "coverage_boost": 0.0, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ] }, "negative": { "explain_only": [ "explain it", "in simpler language" ], "no_build_directive": [ "do not convert it into a reusable capability", "keep it as documentation only" ] } } } }, { "prompt": "I pasted a very long process note. For now I only need a recap and a short summary, not a skill package.", "family": "long_context_summary_only", "score": 0.257, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "summarize_only" ], "exclusive_negative_concepts": [ "summarize_only" ], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.353, "negative_penalty": 0.3, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ], "transform_workflow": [ "process" ] }, "negative": { "summarize_only": [ "summary", "recap" ] } } }, "boundary_case": true } ], "near_neighbor": [ { "prompt": "Review this process note and explain it, no packaging needed.", "family": "explain_not_package", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only", "no_build_directive" ], "exclusive_negative_concepts": [ "explain_only", "no_build_directive" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.2, "negative_penalty": 0.68, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "packaging" ], "transform_workflow": [ "process" ] }, "negative": { "explain_only": [ "explain it" ], "no_build_directive": [ "no packaging needed" ] } } } }, { "prompt": "Turn this into a checklist and template, but stop short of making a full skill.", "family": "partial_scaffold_not_full_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.286, "negative_penalty": 0.42, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "checklist" ] }, "negative": { "no_build_directive": [ "stop short of making a full skill" ] } } } }, { "prompt": "Package this explanation as a document, not as an agent skill.", "family": "document_export_vs_agent_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.4, "negative_penalty": 0.6, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "agent skill" ], "reuse_package": [ "package" ] }, "negative": { "document_only": [ "document" ], "no_build_directive": [ "not as an agent skill" ] } } } }, { "prompt": "Make a future-ready outline for this skill idea, but do not build the package.", "family": "future_outline_vs_build", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "iterate_existing_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "future_outline", "no_build_directive" ], "exclusive_negative_concepts": [ "future_outline", "no_build_directive" ], "semantic_coverage": 0.327, "support_score": 0, "lexical_support": 0.286, "negative_penalty": 0.64, "coverage_boost": 0.04, "concept_evidence": { "positive": { "iterate_existing_skill": [ "this skill" ], "reuse_package": [ "package" ] }, "negative": { "future_outline": [ "future-ready outline" ], "no_build_directive": [ "do not build" ] } } } }, { "prompt": "Polish these notes into a reusable-looking document without turning them into an agent skill.", "family": "document_export_vs_agent_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.462, "negative_penalty": 0.6, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "agent skill" ], "reuse_package": [ "reusable" ] }, "negative": { "document_only": [ "document", "reusable-looking document" ], "no_build_directive": [ "without turning them into an agent skill" ] } } } }, { "prompt": "Turn the workflow below into a wiki-ready reference document and template only. We may build a skill later, but not now.", "family": "long_context_near_neighbor", "score": 0.328, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only" ], "exclusive_negative_concepts": [], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.2, "negative_penalty": 0.18, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "build a skill" ], "transform_workflow": [ "workflow" ] }, "negative": { "document_only": [ "document", "wiki", "reference document", "template only" ] } } }, "boundary_case": true }, { "prompt": "Shape this idea for later and produce a future-ready outline, but do not build the skill package yet.", "family": "future_outline_vs_build", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "future_outline", "no_build_directive" ], "exclusive_negative_concepts": [ "future_outline", "no_build_directive" ], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.278, "negative_penalty": 0.64, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ] }, "negative": { "future_outline": [ "for later", "future-ready outline" ], "no_build_directive": [ "do not build" ] } } } } ] }, "comparison": { "baseline_false_positives": 1, "baseline_false_negatives": 1, "improved_false_positives": 0, "improved_false_negatives": 0, "false_positive_delta": -1, "false_negative_delta": -1, "baseline_precision": 0.875, "improved_precision": 1.0, "baseline_recall": 0.875, "improved_recall": 1.0 }, "returncode": 0 }, "holdout": { "threshold": 0.33, "threshold_explanation": "Prompts at or above the threshold are treated as trigger matches. Scores are driven primarily by semantic intent coverage: packaging intent, workflow-to-skill transformation intent, reuse/distribution intent, and eval intent. Explicit exclusions such as summary-only, translation-only, one-off, document-only, or do-not-build directives apply direct penalties and can override otherwise similar wording.", "false_positives": 0, "false_negatives": 0, "precision": 1.0, "recall": 1.0, "bucket_stats": { "should_trigger": { "total": 5, "passed": 5, "pass_rate": 1.0 }, "should_not_trigger": { "total": 4, "passed": 4, "pass_rate": 1.0 }, "near_neighbor": { "total": 4, "passed": 4, "pass_rate": 1.0 } }, "family_stats": { "complex_multi_asset": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "iterate_existing_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "workflow_to_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "paraphrase_trigger": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_trigger": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "summary_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "explain_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_document_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "translate_only": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "future_outline_vs_build": { "total": 2, "passed": 2, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "document_export_vs_agent_skill": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 }, "long_context_near_neighbor": { "total": 1, "passed": 1, "false_positives": 0, "false_negatives": 0, "pass_rate": 1.0 } }, "misfires": [], "results": { "should_trigger": [ { "prompt": "Build a reusable skill from this long internal process note, and make sure it has references, scripts, and a trigger description.", "family": "complex_multi_asset", "score": 0.468, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.4, "support_score": 0.06, "lexical_support": 0.316, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "multi_asset": [ "references", "scripts" ], "reuse_package": [ "reusable" ], "team_operationalize": [ "internal" ], "transform_workflow": [ "process", "internal process note" ] }, "negative": {} } } }, { "prompt": "Take this existing skill draft, tighten the trigger boundary, add near-neighbor evals, and package it for distribution.", "family": "iterate_existing_skill", "score": 0.729, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "eval_optimize", "iterate_existing_skill", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.691, "support_score": 0, "lexical_support": 0.412, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "eval_optimize": [ "evals", "near-neighbor", "trigger boundary" ], "iterate_existing_skill": [ "existing skill" ], "reuse_package": [ "package", "distribution" ], "team_operationalize": [ "distribution" ], "transform_workflow": [ "existing skill draft" ] }, "negative": {} } } }, { "prompt": "Turn this recurring onboarding process into a reusable skill library entry with validation notes.", "family": "workflow_to_skill", "score": 0.563, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "eval_optimize", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.509, "support_score": 0, "lexical_support": 0.429, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "eval_optimize": [ "validation" ], "reuse_package": [ "reusable", "library entry" ], "transform_workflow": [ "process" ] }, "negative": {} } } }, { "prompt": "Codify this quarterly release routine into a reusable skill package with scripts, references, and packaging checks.", "family": "paraphrase_trigger", "score": 0.858, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "eval_optimize", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [ "multi_asset" ], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.818, "support_score": 0.06, "lexical_support": 0.375, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "eval_optimize": [ "packaging checks" ], "multi_asset": [ "references", "scripts" ], "reuse_package": [ "reusable", "package", "packaging" ], "transform_workflow": [ "quarterly release routine" ] }, "negative": {} } } }, { "prompt": "Below is a noisy transcript plus release notes and prompt history. Ignore the chatter and turn this playbook into an agent capability package for the team library.", "family": "long_context_trigger", "score": 0.744, "predicted_trigger": true, "expected_trigger": true, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "team_operationalize", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [], "exclusive_negative_concepts": [], "semantic_coverage": 0.709, "support_score": 0, "lexical_support": 0.4, "negative_penalty": 0, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "turn this playbook into an agent capability package" ], "reuse_package": [ "package", "team library" ], "team_operationalize": [ "team" ], "transform_workflow": [ "prompt history", "transcript", "release notes" ] }, "negative": {} } } } ], "should_not_trigger": [ { "prompt": "I pasted a long process description below, but I only want a summary, not a reusable skill.", "family": "summary_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive", "summarize_only" ], "exclusive_negative_concepts": [ "no_build_directive", "summarize_only" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.308, "negative_penalty": 0.72, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "reusable" ], "transform_workflow": [ "process" ] }, "negative": { "no_build_directive": [ "not a reusable skill" ], "summarize_only": [ "summary", "only want a summary" ] } } } }, { "prompt": "Explain this release checklist in simpler language; do not package it.", "family": "explain_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "explain_only", "no_build_directive" ], "exclusive_negative_concepts": [ "explain_only", "no_build_directive" ], "semantic_coverage": 0.364, "support_score": 0, "lexical_support": 0.091, "negative_penalty": 0.68, "coverage_boost": 0.04, "concept_evidence": { "positive": { "reuse_package": [ "package" ], "transform_workflow": [ "checklist" ] }, "negative": { "explain_only": [ "explain this", "in simpler language" ], "no_build_directive": [ "do not package it" ] } } } }, { "prompt": "Keep this release workflow as documentation only for the wiki and produce a reference document, not for agent execution.", "family": "long_context_document_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.182, "support_score": 0, "lexical_support": 0.222, "negative_penalty": 0.6, "coverage_boost": 0.0, "concept_evidence": { "positive": { "transform_workflow": [ "workflow" ] }, "negative": { "document_only": [ "document", "wiki", "reference document" ], "no_build_directive": [ "not for agent execution" ] } } } }, { "prompt": "Translate this long runbook into Russian and keep it as documentation only with no skill package.", "family": "translate_only", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "no_build_directive", "translate_only" ], "exclusive_negative_concepts": [ "no_build_directive", "translate_only" ], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.25, "negative_penalty": 0.77, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ], "transform_workflow": [ "runbook" ] }, "negative": { "no_build_directive": [ "no skill package", "keep it as documentation only" ], "translate_only": [ "translate", "into russian" ] } } } } ], "near_neighbor": [ { "prompt": "Help me shape an idea before we decide whether to build a skill.", "family": "future_outline_vs_build", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "brainstorm_only", "future_outline" ], "exclusive_negative_concepts": [ "brainstorm_only", "future_outline" ], "semantic_coverage": 0.309, "support_score": 0, "lexical_support": 0.308, "negative_penalty": 0.46, "coverage_boost": 0.0, "concept_evidence": { "positive": { "build_skill": [ "build a skill" ] }, "negative": { "brainstorm_only": [ "shape an idea" ], "future_outline": [ "before we decide whether to build" ] } } } }, { "prompt": "Create an outline for a possible future skill, but do not build the skill yet.", "family": "future_outline_vs_build", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [], "extra_positive_concepts": [], "matched_negative_concepts": [ "future_outline", "no_build_directive" ], "exclusive_negative_concepts": [ "future_outline", "no_build_directive" ], "semantic_coverage": 0.0, "support_score": 0, "lexical_support": 0.357, "negative_penalty": 0.64, "coverage_boost": 0.0, "concept_evidence": { "positive": {}, "negative": { "future_outline": [ "future skill", "possible future skill", "outline for a possible future skill" ], "no_build_directive": [ "do not build" ] } } } }, { "prompt": "Turn this into a document package for the wiki, not an agent skill.", "family": "document_export_vs_agent_skill", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.491, "support_score": 0, "lexical_support": 0.538, "negative_penalty": 0.6, "coverage_boost": 0.04, "concept_evidence": { "positive": { "build_skill": [ "agent skill" ], "reuse_package": [ "package" ] }, "negative": { "document_only": [ "document", "document package", "wiki" ], "no_build_directive": [ "not an agent skill" ] } } } }, { "prompt": "From the long workflow below, create a template only and a checklist for discussion only. No skill package.", "family": "long_context_near_neighbor", "score": 0.0, "predicted_trigger": false, "expected_trigger": false, "passed": true, "score_detail": { "mode": "semantic-intent", "desired_positive_concepts": [ "build_skill", "transform_workflow", "reuse_package", "eval_optimize", "iterate_existing_skill", "team_operationalize" ], "matched_desired_concepts": [ "build_skill", "reuse_package", "transform_workflow" ], "extra_positive_concepts": [], "matched_negative_concepts": [ "document_only", "no_build_directive" ], "exclusive_negative_concepts": [ "no_build_directive" ], "semantic_coverage": 0.673, "support_score": 0, "lexical_support": 0.375, "negative_penalty": 0.6, "coverage_boost": 0.06, "concept_evidence": { "positive": { "build_skill": [ "skill package" ], "reuse_package": [ "package" ], "transform_workflow": [ "workflow", "checklist" ] }, "negative": { "document_only": [ "template only" ], "no_build_directive": [ "no skill package", "discussion only" ] } } } } ] }, "comparison": { "baseline_false_positives": 0, "baseline_false_negatives": 0, "improved_false_positives": 0, "improved_false_negatives": 0, "false_positive_delta": 0, "false_negative_delta": 0, "baseline_precision": 1.0, "improved_precision": 1.0, "baseline_recall": 1.0, "improved_recall": 1.0 }, "returncode": 0 } } }