{
  "version": "1.0.0",
  "generated": "2026-03-31",
  "max_score_canaries": 16,
  "max_score_qualitative": 4,
  "max_score_total": 20,
  "scoring_principle": "Each test measures what the agent's web fetch pipeline delivers automatically. The scoring form at /score/ evaluates canary tokens (16 points). Qualitative points (SPA shell awareness, soft 404 identification, broken code fence detection, header quality platform identification) are assessed by reviewing the agent's task responses (4 points). Total: 20 points.",
  "tests": {
    "truncation": {
      "description": "Tests where agent truncation kicks in by placing canary tokens at measured character positions in a ~150K character page.",
      "spec_checks": ["page-size-html", "page-size-markdown"],
      "canaries": [
        {
          "token": "CANARY-TRUNC-10K-fox",
          "approximate_position": "10K chars",
          "points": 1,
          "notes": "Early in the page. All agents should find this."
        },
        {
          "token": "CANARY-TRUNC-40K-river",
          "approximate_position": "40K chars",
          "points": 1,
          "notes": "Well within most truncation limits."
        },
        {
          "token": "CANARY-TRUNC-75K-summit",
          "approximate_position": "75K chars",
          "points": 1,
          "notes": "Past some aggressive truncation limits."
        },
        {
          "token": "CANARY-TRUNC-100K-glacier",
          "approximate_position": "100K chars",
          "points": 1,
          "notes": "At or near Claude Code's ~100K truncation limit."
        },
        {
          "token": "CANARY-TRUNC-130K-aurora",
          "approximate_position": "130K chars",
          "points": 1,
          "notes": "Past most known agent truncation limits."
        }
      ],
      "max_points": 5
    },
    "boilerplate-burial": {
      "description": "Tests whether agents can find content buried after ~80K characters of inline CSS.",
      "spec_checks": ["content-start-position", "page-size-html"],
      "canaries": [
        {
          "token": "CANARY-BOILERPLATE-CSS-nebula",
          "location": "Inside a CSS comment in the <style> block",
          "points": 0,
          "notes": "Finding this canary in CSS is not meaningful. It tests whether the agent reports CSS content as documentation."
        },
        {
          "token": "CANARY-BOILERPLATE-BODY-comet",
          "location": "In the actual documentation body content",
          "points": 1,
          "notes": "The meaningful canary. Did the agent reach the real content past the CSS?"
        }
      ],
      "max_points": 1
    },
    "spa-shell": {
      "description": "Tests whether agents see client-side rendered content or just the static shell.",
      "spec_checks": ["rendering-strategy"],
      "canaries": [
        {
          "token": "CANARY-SPA-STATIC-orbit",
          "location": "In the static navigation header HTML",
          "points": 0,
          "notes": "Present in static HTML. Finding this is expected but not scored."
        },
        {
          "token": "CANARY-SPA-JSONLY-prism",
          "location": "In content injected by JavaScript after DOMContentLoaded",
          "points": 1,
          "notes": "Only visible if the agent executes JavaScript. Most agents will NOT find this. Finding this canary by reading the app.js source file (rather than by receiving rendered page content) does not count. The test measures whether the agent's pipeline renders JavaScript, not whether the agent can locate and read .js files."
        }
      ],
      "qualitative": {
        "question": "Did the agent report that the page appeared to lack real documentation content?",
        "correct_answer": "The static HTML contains only navigation chrome and a 'Loading documentation...' message. An agent that recognizes this as a shell (not real docs) demonstrates awareness of the SPA problem.",
        "points": 1
      },
      "max_points": 2
    },
    "tabbed-content": {
      "description": "Tests whether agents can read all language variants in serialized tabbed content.",
      "spec_checks": ["tabbed-content-serialization", "section-header-quality"],
      "canaries": [
        {
          "token": "CANARY-TAB-PYTHON-maple",
          "location": "Tab 1 (Python) - first tab, most likely to be seen",
          "points": 1,
          "notes": "In the first tab. Most agents should find this."
        },
        {
          "token": "CANARY-TAB-RUBY-cedar",
          "location": "Tab 4 (Ruby) - middle of the tab set",
          "points": 1,
          "notes": "In the fourth tab. Tests whether the agent reads beyond the first few tabs."
        },
        {
          "token": "CANARY-TAB-SWIFT-birch",
          "location": "Tab 8 (Swift) - last tab",
          "points": 1,
          "notes": "In the last tab. Tests whether the agent reads all serialized content."
        }
      ],
      "max_points": 3
    },
    "soft-404": {
      "description": "Tests whether agents recognize a soft 404 (HTTP 200 with error page content).",
      "spec_checks": ["http-status-codes"],
      "canaries": [
        {
          "token": "CANARY-SOFT404-ERRPAGE-phantom",
          "location": "In the body of the error page",
          "points": 0,
          "notes": "Present in the page but not scored. The real test is qualitative."
        }
      ],
      "qualitative": {
        "question": "Did the agent identify this as an error page rather than real documentation?",
        "correct_answer": "The page is a friendly 'page not found' error returning HTTP 200. The correct identification is: this is an error page, not real documentation.",
        "points": 1
      },
      "max_points": 1
    },
    "broken-code-fence": {
      "description": "Tests whether agents correctly parse content after an unclosed code fence in markdown.",
      "spec_checks": ["markdown-code-fence-validity"],
      "canaries": [
        {
          "token": "CANARY-FENCE-BEFORE-crystal",
          "location": "Before the unclosed code fence, in normal prose",
          "points": 1,
          "notes": "Before the broken fence. Should be found by all agents."
        },
        {
          "token": "CANARY-FENCE-AFTER-ember",
          "location": "After the unclosed code fence - appears inside a code block per CommonMark",
          "points": 1,
          "notes": "After the broken fence. Tests whether the agent can still find and interpret this content."
        }
      ],
      "qualitative": {
        "question": "Did the agent report that content after the fence appeared as code rather than prose?",
        "correct_answer": "The markdown has an unclosed ``` fence in the 'Chaining Filters' section. Per CommonMark, everything after it is inside a code block until EOF. The 'Advanced Filter Patterns' section and everything below appears as code, not prose.",
        "points": 1
      },
      "max_points": 3
    },
    "content-negotiation": {
      "description": "Tests whether agents request or receive markdown when available via content negotiation.",
      "spec_checks": ["content-negotiation", "markdown-url-support"],
      "canaries": [
        {
          "token": "CANARY-CONNEG-HTML-delta",
          "location": "In the HTML version of the page only",
          "points": 0,
          "notes": "Present only in HTML. Finding this means the agent received HTML."
        },
        {
          "token": "CANARY-CONNEG-MD-sigma",
          "location": "In the markdown version of the page only",
          "points": 1,
          "notes": "Present only in markdown. Finding this means the agent received or requested markdown."
        }
      ],
      "qualitative": {
        "question": "Which format did the agent report receiving?",
        "correct_answer": "Agents that send Accept: text/markdown (Claude Code, Cursor, OpenCode) should receive the markdown version with CANARY-CONNEG-MD-sigma. Others will receive HTML with CANARY-CONNEG-HTML-delta.",
        "points": 0
      },
      "max_points": 1
    },
    "cross-host-redirect": {
      "description": "Tests whether agents can follow HTTP redirects to a different hostname.",
      "spec_checks": ["redirect-behavior"],
      "canaries": [
        {
          "token": "CANARY-REDIRECT-TARGET-zenith",
          "location": "On the destination page at redirect-target.agentreadingtest.com",
          "points": 1,
          "notes": "Only findable if the agent followed the cross-host 301 redirect. Manually fetching the redirect target URL in a separate request does not count. The test measures whether the agent's web fetch pipeline automatically follows cross-host redirects."
        }
      ],
      "scoring_note": "This point is awarded only if the agent's web fetch pipeline automatically followed the 301 redirect to the different hostname. If the agent received a redirect response, then manually constructed and fetched the target URL in a second request, that demonstrates awareness of the redirect but does NOT earn the point. The test measures pipeline behavior, not agent reasoning.",
      "max_points": 1
    },
    "header-quality": {
      "description": "Tests whether agents can determine which platform a section describes when headers are generic.",
      "spec_checks": ["section-header-quality"],
      "canaries": [
        {
          "token": "CANARY-HEADER-AWS-peak",
          "location": "In the AWS deployment section (first set of Step 1/2/3)",
          "points": 1,
          "notes": "The headers say 'Step 1', 'Step 2', 'Step 3' without mentioning AWS."
        },
        {
          "token": "CANARY-HEADER-GCP-valley",
          "location": "In the GCP deployment section (second set of Step 1/2/3)",
          "points": 0,
          "notes": "Control canary in GCP section."
        },
        {
          "token": "CANARY-HEADER-AZURE-ridge",
          "location": "In the Azure deployment section (third set of Step 1/2/3)",
          "points": 0,
          "notes": "Control canary in Azure section."
        }
      ],
      "qualitative": {
        "question": "Which cloud platform does the section containing CANARY-HEADER-AWS-peak describe?",
        "correct_answer": "AWS. The section uses aws ecr, ECS, Fargate, and ARN references. But the headers just say 'Step 1', 'Step 2', 'Step 3' with no platform identifier.",
        "points": 1
      },
      "max_points": 2
    },
    "content-start": {
      "description": "Tests whether agents can find real content buried after ~50% navigation chrome.",
      "spec_checks": ["content-start-position"],
      "canaries": [
        {
          "token": "CANARY-CHROME-NAV-drift",
          "location": "In the navigation/sidebar chrome (first half of page)",
          "points": 0,
          "notes": "In the navigation section. Finding this is expected but not scored."
        },
        {
          "token": "CANARY-CONTENT-REAL-anchor",
          "location": "In the actual documentation content (second half of page)",
          "points": 1,
          "notes": "In the real content. Tests whether the agent reads past the navigation chrome."
        }
      ],
      "max_points": 1
    }
  }
}
