{"schema_version":"lius-daoeval-evaluation-gate-v1","generated_at":"2026-06-11T16:26:53.924Z","status":"draft","official_promotion_allowed":false,"official_verified_runs":0,"draft_runs":4,"gates":[{"id":"core_run_complete","label":"Full core-v0.1 run","status":"draft_artifacts_present","passed_for_official":false,"public_evidence":"https://lius.cc/api/llm/leaderboard.json","requirement":"330 main leaderboard questions must have a published answer artifact and run summary for each candidate official row."},{"id":"l6_human_review","label":"L6 hallucination/trap human signoff","status":"review_packet_present_not_signed_off","passed_for_official":false,"public_evidence":"https://lius.cc/api/llm/leaderboard.json","public_summary":{"schemaVersion":"lius-l6-human-audit-packet-v1","status":"pending_human_review","auditDate":"2026-06-10","generatedAt":"2026-06-10T17:13:05.980Z","purpose":"L6 red-line human audit packet for UNVERIFIED_THREE_GRADER_CONSENSUS_DRAFT leaderboard runs.","packetPath":"tmp/lius-benchmark-runs/L6-human-audit-packet-2026-06-10.jsonl","summaryPath":"tmp/lius-benchmark-runs/L6-human-audit-summary-2026-06-10.json","records":320,"byModel":{"gpt-5.3-codex-spark":80,"gpt-5.4":80,"gpt-5.4-mini":80,"gpt-5.5":80},"byPriority":{"critical_redline":7,"grader_disagreement":86,"spot_check":214,"zero_or_partial":13},"byGrade":{"full_credit":268,"partial_credit":39,"zero":13},"flagged":{"consensus_redline_violation":0,"any_grader_redline_violation":1,"consensus_hallucinated":4,"any_grader_hallucinated":6,"consensus_refused":259,"any_grader_refused":266,"grader_disagreement":91},"l6ReviewPctTarget":100,"officialLeaderboardGate":"Do not convert draft scores to verified leaderboard entries until packet items are reviewed and signed off.","source":"tmp/lius-benchmark-runs/L6-human-audit-summary-2026-06-10.json"},"requirement":"Trap-sensitive and hallucination-sensitive L6 cases must be reviewed by a human before official promotion."},{"id":"cross_provider_comparison","label":"Cross-provider comparison","status":"waiting_for_usable_endpoints","passed_for_official":false,"public_evidence":"https://lius.cc/api/llm/runner-spec.json","requirement":"Claude/Gemini/Daoism-Qwen or equivalent comparison targets should be reported, but they are comparison evidence rather than the sole source of authority."},{"id":"artifact_integrity","label":"Public artifact integrity","status":"partial","passed_for_official":false,"public_evidence":"https://lius.cc/api/llm/daoeval-manifest.json","requirement":"Public benchmark, leaderboard, runner, and review summaries need stable URLs, counts, and integrity records or explicit redaction reasons."},{"id":"governance_policy","label":"Governance and correction policy","status":"published","passed_for_official":true,"public_evidence":"https://lius.cc/api/llm/governance.json","requirement":"Public policy must define maintainer, official promotion rules, correction intake, and version compatibility."}],"blockers":["core_run_complete","l6_human_review","cross_provider_comparison","artifact_integrity"],"unlock_sequence":["Publish or reference integrity records for current draft artifacts.","Complete L6 human signoff and publish a redacted signoff summary.","Run or document cross-provider comparison targets.","Promote only rows whose answer, grading, review, and integrity artifacts are public or explicitly redacted."],"artifacts":{"leaderboard_json":"https://lius.cc/api/llm/leaderboard.json","benchmark_jsonl":"https://lius.cc/api/llm/benchmark.jsonl?suite=core-v0.1","runner_spec":"https://lius.cc/api/llm/runner-spec.json","governance":"https://lius.cc/api/llm/governance.json","daoeval_manifest":"https://lius.cc/api/llm/daoeval-manifest.json","provenance":"https://lius.cc/api/llm/provenance.json","changelog":"https://lius.cc/api/llm/changelog.json","self_host_guide":"https://lius.cc/api/llm/self-host.json","offline_deterministic_scorer":"scripts/benchmark/score-lius-benchmark-offline.mjs"},"boundary":"This gate is a public machine-readable readiness record. It does not promote draft runs to official status."}