{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://julientalbot.com/labs/eval-case-v1.schema.json",
  "title": "eval-case v1",
  "description": "One reduced, replayable field case per JSONL line. Provenance is privacy-bound by design: no client dumps, no private messages, no secrets, no full local paths, no raw traces — logical surfaces and reduced causal structure only. Taxonomy: claim -> action -> evidence -> final claim.",
  "type": "object",
  "required": [
    "spec",
    "case_id",
    "family",
    "intent",
    "observed_state",
    "final_claim",
    "claim_supported",
    "human_risk",
    "oracle",
    "provenance"
  ],
  "properties": {
    "spec": {
      "const": "eval-case-v1"
    },
    "case_id": {
      "type": "string",
      "pattern": "^case/[0-9]{4}$"
    },
    "family": {
      "description": "Failure family: FCE = false completion after a tool boundary, RF = recovery failure (loop without adaptation), EMF = environment mapping failure, EGF = exit-gate failure (closure without post-state check), TBC = truthful blocker (positive control).",
      "enum": ["FCE", "RF", "EMF", "EGF", "TBC"]
    },
    "family_label": {
      "type": "string"
    },
    "positive_control": {
      "type": "boolean",
      "default": false
    },
    "intent": {
      "description": "The human obligation delegated to the agent.",
      "type": "string"
    },
    "environment": {
      "type": "object",
      "properties": {
        "surface": {
          "description": "Logical surface only — never a client name, hostname, or path.",
          "type": "string"
        },
        "runtime": {
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "initial_state": {
      "type": "string"
    },
    "action_observed": {
      "type": "string"
    },
    "observed_state": {
      "description": "The verifiable post-state of the execution environment.",
      "type": "string"
    },
    "final_claim": {
      "description": "What the agent asserted at the end of the loop.",
      "type": "string"
    },
    "claim_supported": {
      "description": "Whether the observable evidence supports the final claim. Invariant: final claim <= observable evidence.",
      "type": "boolean"
    },
    "human_risk": {
      "description": "What the human would wrongly believe or do if the claim were trusted.",
      "type": "string"
    },
    "oracle": {
      "type": "object",
      "required": ["description", "gate"],
      "properties": {
        "description": {
          "type": "string"
        },
        "requires": {
          "description": "Observable evidence required before the claim is allowed.",
          "type": "array",
          "items": { "type": "string" }
        },
        "gate": {
          "enum": ["block_final_claim", "require_evidence", "reinforce_behavior"]
        }
      },
      "additionalProperties": false
    },
    "replay": {
      "type": "object",
      "required": ["checkpoint", "result"],
      "properties": {
        "checkpoint": { "type": "string" },
        "result": { "enum": ["pass", "fail"] },
        "reason": { "type": "string" }
      },
      "additionalProperties": false
    },
    "provenance": {
      "type": "object",
      "required": ["capture", "anonymization", "source"],
      "properties": {
        "capture": {
          "const": "metadata-only"
        },
        "anonymization": {
          "type": "string"
        },
        "source": {
          "description": "Fleet surface class (dogfood, opt-in beta, controlled rerun) — never an identifying reference.",
          "type": "string"
        }
      },
      "additionalProperties": false
    }
  },
  "additionalProperties": false
}
