"""Eval runner — score agent performance against annotated incidents (Phase 4).

Runs the current agent against annotated context bundles and reports:
- Diagnosis accuracy (correct category / correct specific cause)
- Action recommendation accuracy
- False positive rate
- False negative rate
"""
from __future__ import annotations

import logging

logger = logging.getLogger(__name__)


def run_eval(annotated_dir: str, output_path: str) -> None:
    """Score agent against annotated incidents and write a results report."""
    raise NotImplementedError


if __name__ == "__main__":
    run_eval("eval/annotated", "eval/results.json")