.walkthrough{margin-top:80px;padding-top:48px;border-top:1px solid var(--border)}.wt-head{margin-bottom:32px}.wt-eyebrow{display:block;text-transform:uppercase;letter-spacing:.12em;font-size:12px;color:var(--muted);font-weight:600;margin-bottom:4px}.wt-head h2{margin:0 0 8px;font-size:28px;letter-spacing:-.01em}.wt-head h2 code{background:var(--card-soft);padding:2px 8px;border-radius:6px;font-size:22px}.wt-meta{margin:0;color:var(--muted);font-size:14px}.wt-meta code{background:var(--card-soft);padding:1px 5px;border-radius:4px}.card{background:var(--card);border:1px solid var(--border);border-radius:12px;padding:22px;margin-bottom:24px}.card-title{margin:0 0 14px;font-size:14px;font-weight:700;letter-spacing:.06em;text-transform:uppercase;color:var(--muted)}.card-sub{font-weight:500;text-transform:none;letter-spacing:0;color:var(--muted)}.hint{margin:10px 0 0;font-size:12px;color:var(--muted)}.grid-wrap{overflow-x:auto}.result-grid{border-collapse:collapse;width:100%;font-size:14px;table-layout:fixed}.result-grid th,.result-grid td{padding:6px;text-align:center}.result-grid th:first-child,.result-grid td:first-child{width:70px}.result-grid th.sys-col{border-bottom:2px solid var(--border);padding-bottom:8px;width:calc((100% - 70px) / 7)}.result-grid .sys-name{display:block;font-weight:700;font-size:13px;line-height:1.2;overflow-wrap:break-word;word-break:break-word}.result-grid .sys-llm{display:block;font-size:10px;font-weight:500;color:var(--muted);letter-spacing:0;margin-top:2px}.result-grid .task-row-label{text-align:left;font-weight:700;color:var(--fg);border-right:1px solid var(--border);padding-right:10px}.gcell{width:100%;height:32px;border:1px solid var(--border);border-radius:6px;background:transparent;font-weight:500;font-size:15px;cursor:pointer;transition:background .1s ease,transform .05s ease}.gcell:hover{transform:translateY(-1px)}.gcell.pass{color:var(--pass-fg);background:var(--diff-add-bg);border-color:var(--diff-add-bg)}.gcell.fail{color:var(--fail-fg);background:var(--diff-rem-bg);border-color:var(--diff-rem-bg)}.totals-row{font-weight:700}.totals-row td{padding-top:8px;border-top:1px solid var(--border)}.system-tabs{display:flex;gap:6px;flex-wrap:wrap;margin-bottom:16px;border-bottom:1px solid var(--border);padding-bottom:12px}.sys-tab{border:1px solid var(--border);background:var(--bg);color:var(--fg);padding:8px 14px;border-radius:999px;cursor:pointer;font-size:13px;font-weight:600;transition:background .1s ease,border-color .1s ease}.sys-tab .tab-llm{display:block;font-size:10px;font-weight:500;color:var(--muted);margin-top:1px}.sys-tab:hover{border-color:var(--fg)}.sys-tab.active{background:var(--fg);color:var(--bg);border-color:var(--fg)}.sys-tab.active .tab-llm{color:#ffffffb3}.timeline{margin-bottom:22px}.tl-graph{width:100%;height:110px;margin-bottom:4px}.sparkline{width:100%;height:110px;display:block;overflow:visible}.sparkline .tick{stroke:var(--border);stroke-width:1;stroke-dasharray:2 3}.sparkline .line.size{stroke:#2563eb;stroke-width:1.8;fill:none}.sparkline .line.gold{stroke:#d97706;stroke-width:1.8;fill:none}.sparkline .dot.size.active{fill:#2563eb;stroke:#fff;stroke-width:1.5}.sparkline .dot.gold.active{fill:#d97706;stroke:#fff;stroke-width:1.5}.sparkline .sel-line{stroke:#aaa;stroke-width:1;stroke-dasharray:2 2}.sparkline .axis-label{font-size:10px;fill:var(--muted)}.sparkline .sel-x{font-size:11px;fill:var(--fg);font-weight:600}.timeline>.series-key{display:flex;gap:18px;justify-content:center;font-size:11px;color:var(--muted);margin-bottom:4px}.line-key{display:inline-block;width:18px;height:2px;background:#2563eb;vertical-align:middle;margin-right:5px}.line-key.gold{background:#d97706}.tl-row{display:grid;grid-template-columns:repeat(17,1fr);gap:0;padding-left:calc(100% * 50 / 1800);padding-right:calc(100% * 50 / 1800);margin-top:22px}.tl-cell{border:1px solid var(--border);background:var(--bg);border-radius:6px;margin:0 2px;padding:8px 0;cursor:pointer;font-size:11px;color:var(--muted);font-weight:600;transition:transform .05s ease,border-color .1s ease,background .1s ease}.tl-cell.filler{background:var(--card-soft)}.tl-cell.evidence{background:#d5e8ee;color:#1a4c5b;border-color:#b6d8df}.tl-cell.active{outline:2px solid var(--fg);outline-offset:1px}.tl-cell:hover{transform:translateY(-1px)}.tl-probe-row{position:relative;height:34px;margin-top:4px}.probe-marker{position:absolute;top:0;transform:translate(-50%);background:transparent;border:0;padding:2px 6px 4px;cursor:pointer;display:flex;flex-direction:column;align-items:center;gap:0;line-height:1;border-radius:6px}.probe-marker .probe-arrow{font-size:14px;line-height:1}.probe-marker.probe-before .probe-arrow{color:#c08a1a}.probe-marker.probe-after .probe-arrow{color:#b86a18}.probe-marker .probe-label{font-size:10px;letter-spacing:.04em;color:var(--muted);margin-top:2px;white-space:nowrap}.probe-marker:hover{background:var(--card-soft)}.probe-marker.active .probe-arrow{transform:scale(1.25)}.probe-marker.active .probe-label{color:var(--fg);font-weight:700}.tl-legend{display:flex;flex-wrap:wrap;gap:14px;margin-top:4px;font-size:11px;color:var(--muted)}.tl-legend .dot{display:inline-block;width:10px;height:10px;border-radius:3px;vertical-align:middle;margin-right:5px;border:1px solid var(--border)}.tl-legend .dot.filler{background:var(--card-soft)}.tl-legend .dot.evidence{background:#d5e8ee}.tl-legend .probe-mini{color:#b86a18;margin-right:4px}.detail-pane{min-height:280px}.detail-grid{display:grid;grid-template-columns:minmax(0,1fr) minmax(0,1.1fr);gap:18px}@media(max-width:760px){.detail-grid{grid-template-columns:1fr}}.sess-pane,.snap-head,.qa-pane{font-size:14px}.sess-head,.snap-head,.qa-head{display:flex;justify-content:space-between;align-items:baseline;margin-bottom:10px;gap:12px}.sess-head h4,.snap-head h4,.qa-head h4{margin:0;font-size:14px;font-weight:700;letter-spacing:.04em;text-transform:uppercase;color:var(--muted)}.snap-head h4 .snap-sub,.qa-head h4 .qa-sub{font-weight:500;text-transform:none;letter-spacing:0;color:var(--fg);margin-left:8px}.sess-ts,.qa-ts{font-size:12px;color:var(--muted)}.kind-pill{display:inline-block;font-size:10px;padding:2px 8px;border-radius:99px;background:#d5e8ee;color:#1a4c5b;margin-left:8px;text-transform:none;letter-spacing:0;font-weight:600}.kind-pill.filler{background:var(--card-soft);color:var(--muted)}.gold-facts{background:var(--card-soft);border:1px solid var(--border);border-radius:8px;padding:10px 14px;margin-bottom:12px}.gold-facts h5{margin:0 0 6px;font-size:11px;font-weight:700;color:var(--muted);text-transform:uppercase;letter-spacing:.06em}.gold-facts ul{list-style:none;padding:0;margin:0}.gold-facts li{font-size:13px;padding:2px 0}.gold-facts code{background:var(--code-bg);padding:1px 5px;border-radius:4px;font-size:12px}.gold-facts em{color:var(--muted);font-style:italic;font-size:12px}.sess-conv{max-height:520px;overflow-y:auto;padding-right:6px}.turn{display:grid;grid-template-columns:28px 1fr;gap:10px;margin:10px 0;align-items:start}.turn .avatar{width:28px;height:28px;border-radius:50%;display:flex;align-items:center;justify-content:center;background:#fff;border:1px solid var(--border);flex-shrink:0}.turn .avatar-icon{width:18px;height:18px}.turn .msg{min-width:0}.turn .role{font-size:11px;font-weight:700;color:var(--muted);margin-bottom:4px}.turn .role{color:#185fa5}.turn-assistant .role{color:var(--muted)}.turn .bubble{background:var(--bubble-user);padding:10px 14px;border-radius:4px 14px 14px;font-size:13px;line-height:1.55;white-space:pre-wrap;word-break:break-word}.turn-assistant .bubble{background:var(--bubble-assistant)}.detail-qa{margin-top:24px;padding-top:18px;border-top:1px solid var(--border)}.snap-controls{display:inline-flex;gap:4px}.seg{border:1px solid var(--border);background:var(--bg);color:var(--fg);padding:4px 10px;border-radius:6px;font-size:12px;cursor:pointer;font-weight:600}.seg.active{background:var(--fg);color:var(--bg);border-color:var(--fg)}.snap-summary{display:flex;gap:8px;margin-bottom:8px;flex-wrap:wrap}.badge{font-size:11px;padding:2px 8px;border-radius:99px;background:var(--card-soft);color:var(--muted);font-weight:600}.badge.added-badge{background:var(--diff-add-bg);color:var(--pass-fg)}.badge.removed-badge{background:var(--diff-rem-bg);color:var(--fail-fg)}.snap-body{max-height:520px;overflow-y:auto}.snap-block{margin:0;padding:10px 12px;background:var(--stripe);border:1px solid var(--border);border-radius:8px;font-size:12px;line-height:1.55;font-family:ui-monospace,SFMono-Regular,Menlo,monospace;white-space:pre-wrap;word-break:break-word}.snap-block .ln{display:block;padding:1px 4px;border-radius:3px}.snap-block .ln.added{background:var(--diff-add-bg);color:var(--diff-add-fg)}.snap-block .ln.removed{background:var(--diff-rem-bg);color:var(--diff-rem-fg)}.chip-ER{background:#6b7280;color:#fff}.chip-Agg{background:#0e9594;color:#fff}.chip-Tr{background:#185fa5;color:#fff}.chip-Del{background:#b8860b;color:#fff}.chip-Cas{background:#c23535;color:#fff}.chip-Abs{background:#7e22ce;color:#fff}.ln-chip{display:inline-block;font-size:9px;font-weight:800;letter-spacing:.04em;padding:1px 6px;border-radius:4px;margin-right:5px;line-height:1.4;vertical-align:1px;text-transform:uppercase}.snap-empty,.snap-loading,.snap-error{padding:12px;background:var(--stripe);border:1px dashed var(--border);border-radius:8px;font-size:13px;color:var(--muted);text-align:center}.snap-meta{margin-top:6px;font-size:11px;color:var(--muted);text-align:right}.qa-pane{font-size:14px}.qa-list{list-style:none;padding:0;margin:0}.qa-row{border:1px solid var(--border);border-radius:10px;margin-bottom:10px;overflow:hidden;background:var(--card)}.qa-row.pass{border-left:3px solid var(--pass-fg)}.qa-row.fail{border-left:3px solid var(--fail-fg)}.qa-summary{width:100%;background:transparent;border:0;cursor:pointer;padding:12px 14px;text-align:left;font:inherit;color:inherit;display:block}.qa-summary:hover{background:var(--stripe)}.qa-line{display:flex;gap:6px;margin:2px 0;font-size:13px}.qa-line.qa-q{font-weight:700;align-items:center}.qa-tag{display:inline-block;font-size:13px;padding:3px 10px;border-radius:5px;font-weight:800;letter-spacing:.04em;text-transform:uppercase;margin-right:8px}.qa-line.qa-stages{display:flex;align-items:center;flex-wrap:wrap;gap:4px;margin:6px 0 4px}.qa-stages .stage{font-size:11px;padding:2px 8px;border-radius:99px;font-weight:600;letter-spacing:.02em}.qa-stages .stage.pass{background:var(--diff-add-bg);color:var(--pass-fg)}.qa-stages .stage.fail{background:var(--diff-rem-bg);color:var(--fail-fg);font-weight:800}.qa-stages .stage.dim{background:var(--card-soft);color:#aaa}.qa-stages .stage-arrow{color:var(--muted);font-size:11px;margin:0 2px}.qa-stage-note{font-size:11px;font-style:italic;color:var(--muted);margin:2px 0 6px;padding-left:2px}.qa-phase-note{font-size:11px;color:var(--muted);font-style:italic;font-weight:500}.qa-label{color:var(--muted);font-weight:600;min-width:50px}.qa-a-text,.qa-g-text{white-space:pre-wrap;word-break:break-word}.qa-expanded{padding:0 14px 14px;border-top:1px dashed var(--border);margin-top:4px}.qa-reason{margin:10px 0;font-size:13px;color:var(--fg)}.qa-ctx-toggle{border:1px solid var(--border);background:var(--bg);color:var(--fg);padding:5px 12px;border-radius:6px;cursor:pointer;font-size:12px;font-weight:600}.qa-ctx-body{margin-top:8px;max-height:360px;overflow-y:auto}.qa-ctx-pre{margin:0;padding:10px;background:var(--stripe);border:1px solid var(--border);border-radius:8px;font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:11px;line-height:1.5;white-space:pre-wrap;word-break:break-word}.qa-ctx-empty{margin:10px 0;padding:10px;background:var(--stripe);border:1px dashed var(--border);border-radius:8px;font-size:12px;color:var(--muted);text-align:center}.main-results[data-astro-cid-aowcoo4s]{margin-top:64px;padding-top:40px;border-top:1px solid var(--border)}.mr-head[data-astro-cid-aowcoo4s]{margin-bottom:18px}.mr-eyebrow[data-astro-cid-aowcoo4s]{display:block;text-transform:uppercase;letter-spacing:.12em;font-size:12px;color:var(--muted);font-weight:600;margin-bottom:4px}.mr-head[data-astro-cid-aowcoo4s] h2[data-astro-cid-aowcoo4s]{margin:0 0 6px;font-size:22px;letter-spacing:-.01em}.mr-meta[data-astro-cid-aowcoo4s]{margin:0;color:var(--muted);font-size:13px;line-height:1.55}.mr-meta[data-astro-cid-aowcoo4s] code[data-astro-cid-aowcoo4s]{background:var(--code-bg);padding:1px 5px;border-radius:4px}.mr-wrap[data-astro-cid-aowcoo4s]{overflow-x:auto;background:var(--card);border:1px solid var(--border);border-radius:10px;padding:14px 18px}.mr-table[data-astro-cid-aowcoo4s]{width:100%;border-collapse:separate;border-spacing:0;font-size:13px}.mr-table[data-astro-cid-aowcoo4s] th[data-astro-cid-aowcoo4s],.mr-table[data-astro-cid-aowcoo4s] td[data-astro-cid-aowcoo4s]{text-align:center;padding:5px 8px;vertical-align:middle}.mr-table[data-astro-cid-aowcoo4s] thead[data-astro-cid-aowcoo4s] th[data-astro-cid-aowcoo4s]{font-size:12px;text-transform:uppercase;letter-spacing:.04em;color:var(--muted);padding-bottom:8px}.mr-table[data-astro-cid-aowcoo4s] thead[data-astro-cid-aowcoo4s] th[data-astro-cid-aowcoo4s][rowspan="2"]{border-bottom:1px solid var(--border);vertical-align:middle}.mr-table[data-astro-cid-aowcoo4s] thead[data-astro-cid-aowcoo4s] th[data-astro-cid-aowcoo4s].cost-group{border-bottom:1px solid var(--border);padding-bottom:4px}.mr-table[data-astro-cid-aowcoo4s] thead[data-astro-cid-aowcoo4s] tr[data-astro-cid-aowcoo4s].sub-head th[data-astro-cid-aowcoo4s]{border-bottom:1px solid var(--border);padding-bottom:4px;padding-top:2px;font-size:11px;text-transform:none}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .group-row[data-astro-cid-aowcoo4s] td[data-astro-cid-aowcoo4s]{text-align:left;padding:12px 4px 4px;font-size:11px;color:var(--muted);text-transform:uppercase;letter-spacing:.06em;border:0}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s] .sys-name[data-astro-cid-aowcoo4s]{text-align:left;font-weight:600}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s] .sys-llm[data-astro-cid-aowcoo4s]{display:block;font-size:11px;font-weight:500;color:var(--muted)}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s].featured{background:#fff8df}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s].featured .sys-name[data-astro-cid-aowcoo4s] .star[data-astro-cid-aowcoo4s]{color:#b48a1b;margin-right:4px}.mr-table[data-astro-cid-aowcoo4s] .col-overall[data-astro-cid-aowcoo4s]{background:var(--overall-col)}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .overall[data-astro-cid-aowcoo4s]{font-weight:500}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s] .success[data-astro-cid-aowcoo4s]{color:var(--pass-fg);font-weight:700}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .cost-col[data-astro-cid-aowcoo4s]{font-variant-numeric:tabular-nums;font-size:12px;color:var(--muted)}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s].featured .cost-col[data-astro-cid-aowcoo4s]{color:var(--fg)}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .row[data-astro-cid-aowcoo4s] .best[data-astro-cid-aowcoo4s] strong[data-astro-cid-aowcoo4s]{font-weight:800}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .avg-row[data-astro-cid-aowcoo4s]{border-top:1px solid var(--border);color:var(--muted);font-style:italic;font-size:12px}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .avg-row[data-astro-cid-aowcoo4s] td[data-astro-cid-aowcoo4s].danger{color:var(--fail-fg);font-weight:700}.mr-table[data-astro-cid-aowcoo4s] tbody[data-astro-cid-aowcoo4s] .avg-row[data-astro-cid-aowcoo4s] th[data-astro-cid-aowcoo4s]{text-align:left;font-weight:600}:root{--bg: #ffffff;--fg: #1a1a1a;--muted: #5b5b5b;--accent: #1a1a1a;--border: #e5e3dc;--card: #ffffff}*{box-sizing:border-box}html,body{margin:0;padding:0;background:var(--bg);color:var(--fg);font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,sans-serif;font-size:17px;line-height:1.6;-webkit-font-smoothing:antialiased;text-rendering:optimizeLegibility}main{max-width:1080px;margin:0 auto;padding:72px 28px 96px}.overview{display:grid;grid-template-columns:minmax(0,1.1fr) minmax(0,.9fr);gap:32px;align-items:center;margin:0 0 32px}@media(max-width:760px){.overview{grid-template-columns:1fr;gap:20px}}.taxonomy{margin:0;padding:0;text-align:center}.taxonomy img{display:block;width:100%;max-width:380px;height:auto;margin:0 auto}.hero{text-align:center;margin-bottom:56px}.hero h1{margin:0 0 28px;line-height:1.05;letter-spacing:-.02em;font-weight:800}.hero-acronym{display:block;font-size:clamp(56px,9vw,96px);letter-spacing:-.04em}.hero-fullname{display:block;font-size:clamp(18px,2.6vw,24px);font-weight:500;color:var(--muted);margin-top:8px;letter-spacing:0}.links{display:flex;gap:12px;justify-content:center;flex-wrap:wrap}.btn{display:inline-flex;align-items:center;gap:8px;padding:10px 18px;border:1px solid var(--fg);border-radius:999px;background:var(--fg);color:var(--bg);text-decoration:none;font-weight:600;font-size:15px;transition:transform .08s ease,background .15s ease,color .15s ease}.btn:hover{background:var(--bg);color:var(--fg);transform:translateY(-1px)}.btn:focus-visible{outline:2px solid var(--fg);outline-offset:3px}.icon{width:18px;height:18px;flex-shrink:0}.fig{margin:0 0 48px;padding:24px;background:#fff;border:1px solid var(--border);border-radius:12px}.taxonomy{background:#fff;border-radius:8px;padding:8px}.fig img{display:block;width:100%;height:auto}.fig figcaption{margin-top:16px;font-size:14px;line-height:1.55;color:var(--muted)}.summary p{margin:0 0 18px;font-size:17px;color:var(--fg)}.summary p:last-child{margin-bottom:0}.foot{margin-top:64px;padding-top:24px;border-top:1px solid var(--border);text-align:center;font-size:13px;color:var(--muted)}.foot p{margin:0}@media(max-width:560px){main{padding:48px 20px 64px}.hero{margin-bottom:40px}.fig{padding:14px}}:root{--card-soft: #f7f7f1;--bubble-user: #eef4fa;--bubble-assistant: #f3f3ee;--diff-add-bg: #e4f5ea;--diff-add-fg: #155a35;--diff-rem-bg: #fbeaea;--diff-rem-fg: #8b2828;--pass-bg: #e4f5ea;--pass-fg: #1a7c4a;--fail-bg: #fbeaea;--fail-fg: #b94747;--overall-col: #f7f7f1;--code-bg: #f3f3ed;--stripe: #fbfbf6}:root[data-theme=dark]{--bg: #0f0f0f;--fg: #ececea;--muted: #9c9a90;--border: #2d2d2a;--card: #181818;--card-soft: #1f1e1c;--bubble-user: #1c2c3c;--bubble-assistant: #2a2a26;--diff-add-bg: #15321f;--diff-add-fg: #9adfb4;--diff-rem-bg: #3a1d1d;--diff-rem-fg: #f0a0a0;--pass-bg: #15321f;--pass-fg: #9adfb4;--fail-bg: #3a1d1d;--fail-fg: #f0a0a0;--overall-col: #1f1e1c;--code-bg: #2a2a28;--stripe: #181818}.theme-toggle{position:fixed;top:18px;right:18px;z-index:50;width:36px;height:36px;border-radius:999px;border:1px solid var(--border);background:var(--card);color:var(--fg);cursor:pointer;display:inline-flex;align-items:center;justify-content:center;font-size:16px;transition:background .15s ease,transform .05s ease}.theme-toggle:hover{transform:translateY(-1px)}.theme-toggle .toggle-sun{display:none}.theme-toggle .toggle-moon{display:inline}:root[data-theme=dark] .theme-toggle .toggle-sun{display:inline}:root[data-theme=dark] .theme-toggle .toggle-moon{display:none}
