diff --git a/.claude/commands/ai-review-local.md b/.claude/commands/ai-review-local.md
index bde1b5a1..ad05587f 100644
--- a/.claude/commands/ai-review-local.md
+++ b/.claude/commands/ai-review-local.md
@@ -1,6 +1,6 @@
 ---
 description: Run AI code review locally using OpenAI API before opening a PR
-argument-hint: "[--full-registry] [--model <model>] [--dry-run]"
+argument-hint: "[--context minimal|standard|deep] [--include-files <files>] [--token-budget <n>] [--force-fresh] [--full-registry] [--model <model>] [--dry-run]"
 ---
 
 # Local AI Code Review
@@ -12,6 +12,15 @@ pre-PR use. Designed for iterative review/revision cycles before submitting a PR
 ## Arguments
 
 `$ARGUMENTS` may contain optional flags:
+- `--context {minimal,standard,deep}`: Context depth (default: `standard`)
+  - `minimal`: Diff only (original behavior)
+  - `standard`: Diff + full contents of changed `diff_diff/` source files
+  - `deep`: Standard + import-graph expansion (files imported by changed files)
+- `--include-files <file1,file2,...>`: Extra files to include as read-only context
+  (filenames resolve under `diff_diff/`, or use paths relative to repo root)
+- `--token-budget <n>`: Max estimated input tokens before dropping import-context
+  files (default: 200000). Changed source files are always included regardless of budget.
+- `--force-fresh`: Skip delta-diff mode, run a full fresh review even if previous state exists
 - `--full-registry`: Include the entire REGISTRY.md instead of selective sections
 - `--model <name>`: Override the OpenAI model (default: `gpt-5.4`)
 - `--dry-run`: Print the compiled prompt without calling the API
@@ -31,7 +40,8 @@ before any data is sent externally.
 ### Step 1: Parse Arguments
 
 Parse `$ARGUMENTS` for the optional flags listed above. All flags are optional —
-the default behavior (selective registry, gpt-5.4, live API call) requires no arguments.
+the default behavior (standard context, selective registry, gpt-5.4, live API call)
+requires no arguments.
 
 ### Step 2: Validate Prerequisites
 
@@ -146,20 +156,149 @@ Options:
 
 If user selects abort, clean up temp files and stop. If continue, proceed.
 
+### Step 3c: Full-File Secret Scan (for standard/deep context)
+
+When `--context` is not `minimal`, full source files will be uploaded to OpenAI. The diff-based scan in Step 3b only covers changed lines, so scan the full content of every file that will be transmitted:
+
+```bash
+# Category 1: Changed diff_diff/ source files (standard/deep)
+upload_scan_files=""
+if [ "$context_level" != "minimal" ]; then
+    upload_scan_files=$(git diff --name-only "${comparison_ref}...HEAD" | grep "^diff_diff/.*\.py$" || true)
+fi
+
+# Category 2: All diff_diff/*.py for deep mode (conservative superset of import expansion)
+if [ "$context_level" = "deep" ]; then
+    upload_scan_files=$(find diff_diff -name "*.py" -not -path "*/__pycache__/*" 2>/dev/null | sort -u)
+fi
+
+# Category 3: --include-files (mirror script's path confinement)
+if [ -n "$include_files" ]; then
+    repo_root_real=$(pwd -P)
+    for f in $(echo "$include_files" | tr ',' ' '); do
+        # Reject absolute paths (mirrors script's os.path.isabs check)
+        case "$f" in /*) continue ;; esac
+        # Mirror script's branching: only bare filenames (no /) resolve under diff_diff/
+        # Slash-containing inputs resolve relative to repo root only
+        case "$f" in
+            */*) if [ -f "$f" ]; then candidate="$f"; else continue; fi ;;
+            *)   if [ -f "diff_diff/$f" ]; then candidate="diff_diff/$f"; else continue; fi ;;
+        esac
+        # Verify within repo root (mirrors script's realpath containment)
+        real_candidate=$(cd "$(dirname "$candidate")" && pwd -P)/$(basename "$candidate")
+        case "$real_candidate" in "$repo_root_real"/*) upload_scan_files="$upload_scan_files $candidate" ;; esac
+    done
+fi
+
+# Scan using same canonical content patterns from Step 3b (never echo secret values)
+if [ -n "$upload_scan_files" ]; then
+    secret_hits=$(grep -rlE "(AKIA[A-Z0-9]{16}|ghp_[a-zA-Z0-9]{36}|sk-[a-zA-Z0-9]{48}|gho_[a-zA-Z0-9]{36}|[Aa][Pp][Ii][_-]?[Kk][Ee][Yy][[:space:]]*[=:]|[Ss][Ee][Cc][Rr][Ee][Tt][_-]?[Kk][Ee][Yy][[:space:]]*[=:]|[Pp][Aa][Ss][Ss][Ww][Oo][Rr][Dd][[:space:]]*[=:]|[Pp][Rr][Ii][Vv][Aa][Tt][Ee][_-]?[Kk][Ee][Yy]|[Bb][Ee][Aa][Rr][Ee][Rr][[:space:]]+[a-zA-Z0-9_-]+|[Tt][Oo][Kk][Ee][Nn][[:space:]]*[=:])" $upload_scan_files 2>/dev/null || true)
+    sensitive_hits=$(echo "$upload_scan_files" | tr ' ' '\n' | grep -iE "(\.env|credentials|secret|\.pem|\.key|\.p12|\.pfx|id_rsa|id_ed25519)$" || true)
+fi
+```
+
+If either `secret_hits` or `sensitive_hits` is non-empty, use AskUserQuestion:
+```
+Warning: Potential secrets detected in source files that would be uploaded to OpenAI
+(full-file scan, not just diff hunks):
+- <list filenames>
+
+Options:
+1. Abort — review and remove secrets before retrying
+2. Continue — I confirm these are not real secrets
+```
+
+If user selects abort, clean up temp files and stop. If continue, proceed.
+
 ### Step 4: Handle Re-Review State
 
-If `.claude/reviews/local-review-latest.md` exists, preserve it for re-review context:
+Check for existing review state and generate delta diff if applicable. **Validate that the
+stored state matches the current branch and base** before reusing — stale state from a
+different branch can contaminate re-review context.
+
+**If `--force-fresh` is set**, delete prior state but still seed a new baseline:
 ```bash
-if [ -f .claude/reviews/local-review-latest.md ]; then
-    cp .claude/reviews/local-review-latest.md .claude/reviews/local-review-previous.md
-    echo "Previous review preserved for re-review context."
+rm -f .claude/reviews/review-state.json
+rm -f .claude/reviews/local-review-latest.md
+rm -f .claude/reviews/local-review-previous.md
+echo "Force-fresh: deleted all prior review state. Fresh review will seed new baseline."
+# Do NOT pass --previous-review or --delta-diff/--delta-changed-files
+# DO still pass --review-state, --commit-sha, --base-ref so the fresh run seeds a new baseline
+```
+
+**Otherwise**, validate existing review state using the Python validator (single-point
+validation that checks schema version, branch/base match, and required finding fields):
+```bash
+if [ -f .claude/reviews/review-state.json ]; then
+    # Use the script's validate_review_state() for comprehensive validation
+    # Returns: last_reviewed_commit and is_valid flag
+    validation_result=$(python3 -c "
+import importlib.util, sys
+spec = importlib.util.spec_from_file_location('openai_review', '.claude/scripts/openai_review.py')
+mod = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+_, _, commit, valid = mod.validate_review_state(
+    '.claude/reviews/review-state.json', '$branch_name', '$comparison_ref'
+)
+print(f'{commit}|{valid}')
+" 2>/dev/null || echo "|False")
+    last_reviewed_commit=$(echo "$validation_result" | cut -d'|' -f1)
+    state_valid=$(echo "$validation_result" | cut -d'|' -f2)
+
+    if [ "$state_valid" != "True" ]; then
+        echo "Warning: review-state.json validation failed. Running fresh review."
+        rm -f .claude/reviews/review-state.json
+        rm -f .claude/reviews/local-review-previous.md
+        last_reviewed_commit=""
+    fi
+
+    # Only generate delta when state is valid AND commit is an ancestor of HEAD
+    if [ -n "$last_reviewed_commit" ] && git merge-base --is-ancestor "$last_reviewed_commit" HEAD 2>/dev/null; then
+        # SHA is a valid ancestor — generate delta diff
+        git diff --unified=5 "${last_reviewed_commit}...HEAD" > /tmp/ai-review-delta-diff.patch
+        git diff --name-status "${last_reviewed_commit}...HEAD" > /tmp/ai-review-delta-files.txt
+
+        # Check if delta is empty
+        if [ ! -s /tmp/ai-review-delta-diff.patch ]; then
+            echo "No changes since last review (commit ${last_reviewed_commit:0:7}). Use --force-fresh to re-review."
+            rm -f /tmp/ai-review-delta-diff.patch /tmp/ai-review-delta-files.txt
+            rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt
+            # Stop here
+        fi
+        # State validated and delta generated — preserve previous review for re-review context
+        if [ -f .claude/reviews/local-review-latest.md ]; then
+            cp .claude/reviews/local-review-latest.md .claude/reviews/local-review-previous.md
+            echo "Previous review preserved for re-review context."
+        fi
+    else
+        echo "Warning: Previous review commit is not an ancestor of HEAD (likely rebase). Running fresh review."
+        rm -f .claude/reviews/review-state.json
+        rm -f .claude/reviews/local-review-previous.md
+    fi
+fi
+
+# Final cleanup: if delta mode was NOT activated (no delta files generated),
+# ensure no stale previous-review file can leak into the fresh review.
+# This covers the case where review-state.json is missing entirely but
+# local-review-previous.md lingers from a prior run.
+if [ ! -f /tmp/ai-review-delta-diff.patch ]; then
+    rm -f .claude/reviews/local-review-previous.md
 fi
 ```
 
+**Important**: Previous review text is ONLY preserved when delta mode is active (delta files
+generated and state validated). When delta mode is NOT active — whether because
+`review-state.json` is missing, branch/base mismatch, non-ancestor, or `--force-fresh` —
+the previous review file is deleted to prevent stale findings from leaking into a fresh
+review via `--previous-review`.
+
 ### Step 5: Run the Review Script
 
-Build and run the command. Include `--previous-review` only if the previous review file
-exists from Step 4.
+Build and run the command. Include optional arguments only when their conditions are met:
+- `--previous-review`: only if `.claude/reviews/local-review-previous.md` exists AND `--force-fresh` was NOT set
+- `--delta-diff` and `--delta-changed-files`: only if delta files were generated in Step 4
+- `--review-state`, `--commit-sha`, `--base-ref`: always include (even with `--force-fresh`, to seed a new baseline)
+- `--context`, `--include-files`, `--token-budget`: pass through from parsed arguments
 
 ```bash
 python3 .claude/scripts/openai_review.py \
@@ -169,14 +308,26 @@ python3 .claude/scripts/openai_review.py \
     --changed-files /tmp/ai-review-files.txt \
     --output .claude/reviews/local-review-latest.md \
     --branch-info "$branch_name" \
+    --repo-root "$(pwd)" \
+    --context "$context_level" \
+    --review-state .claude/reviews/review-state.json \
+    --commit-sha "$(git rev-parse HEAD)" \
+    --base-ref "$comparison_ref" \
     [--previous-review .claude/reviews/local-review-previous.md] \
+    [--delta-diff /tmp/ai-review-delta-diff.patch] \
+    [--delta-changed-files /tmp/ai-review-delta-files.txt] \
+    [--include-files "$include_files"] \
+    [--token-budget "$token_budget"] \
     [--full-registry] \
     [--model <model>] \
     [--dry-run]
 ```
 
-If `--dry-run`: display the prompt output and stop. Report the estimated token count
-and model that would be used.
+Note: `--force-fresh` is a skill-only flag — it controls whether delta diffs are
+generated in Step 4 and is NOT passed to the script.
+
+If `--dry-run`: display the prompt output and stop. Report the estimated token count,
+cost estimate, and model that would be used.
 
 If the script exits non-zero, display the error output and stop.
 
@@ -191,6 +342,10 @@ Parse the review output to extract ALL findings. For each finding, capture:
 - Section (Methodology, Code Quality, etc.)
 - One-line summary of the issue
 
+Note: The script handles writing `review-state.json` automatically (finding tracking
+across rounds). The skill does NOT need to write JSON — just pass `--review-state`
+and `--commit-sha` to the script.
+
 Present a **findings summary** showing every finding, grouped by severity:
 
 ```
@@ -243,13 +398,21 @@ directly — for P0/P1 issues use `EnterPlanMode` for a structured approach; for
 issues, fix them directly since they are minor.
 
 After fixes are committed, the user re-runs `/ai-review-local` for a follow-up review.
+On re-review, the script automatically activates delta-diff mode (comparing only
+changes since the last reviewed commit) and shows a structured findings table
+tracking which previous findings have been addressed.
 
 ### Step 8: Cleanup
 
 ```bash
-rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt
+rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt \
+      /tmp/ai-review-delta-diff.patch /tmp/ai-review-delta-files.txt
 ```
 
+Note: `.claude/reviews/review-state.json` is preserved across review rounds (it
+tracks the last reviewed commit and findings). It is cleaned up when the user
+runs `--force-fresh` or when a rebase invalidates the tracked commit.
+
 ## Error Handling
 
 | Scenario | Response |
@@ -260,21 +423,36 @@ rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt
 | Script exits non-zero | Display stderr output from script |
 | Previous review file missing on re-run | Script warns and continues as fresh review |
 | User aborts due to uncommitted changes | Clean exit |
+| No changes since last review (empty delta) | Report and stop, suggest `--force-fresh` |
+| Rebase invalidates last reviewed commit | Warn, delete stale state, run fresh review |
+| `review-state.json` schema mismatch | Script warns and starts fresh |
 
 ## Examples
 
 ```bash
-# Standard review of current branch vs main
+# Standard review of current branch vs main (default: full source file context)
 /ai-review-local
 
-# Review with full methodology registry
-/ai-review-local --full-registry
+# Review with minimal context (diff only, original behavior)
+/ai-review-local --context minimal
+
+# Review with deep context (changed files + imported files)
+/ai-review-local --context deep
+
+# Include specific files as extra context
+/ai-review-local --include-files linalg.py,utils.py
 
 # Preview the compiled prompt without calling the API
 /ai-review-local --dry-run
 
-# Use a different model
-/ai-review-local --model gpt-4.1
+# Force a fresh review (ignore previous review state)
+/ai-review-local --force-fresh
+
+# Use a different model with full registry
+/ai-review-local --model gpt-4.1 --full-registry
+
+# Limit token budget for faster/cheaper reviews
+/ai-review-local --token-budget 100000
 ```
 
 ## Notes
@@ -282,6 +460,20 @@ rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt
 - This skill does NOT modify source files — it only generates temp files and
   review artifacts in `.claude/reviews/` (which is gitignored). It may also
   create a commit if there are uncommitted changes (Step 3).
+- **Context levels**: By default (`standard`), the full contents of changed
+  `diff_diff/` source files are sent alongside the diff. This catches "sins of
+  omission" — code that should have changed but wasn't (e.g., a wrapper missing
+  a new parameter). Use `--context deep` to also include files imported by
+  changed files as read-only reference.
+- **Delta-diff re-review**: When `review-state.json` exists from a previous run,
+  the script automatically generates a delta diff (changes since the last reviewed
+  commit) and focuses the reviewer on those changes. The full branch diff is
+  included as reference context. Use `--force-fresh` to bypass this.
+- **Finding tracking**: The script writes structured findings to `review-state.json`
+  after each review. On re-review, previous findings are shown in a table with
+  their status (open/addressed), enabling the reviewer to focus on what changed.
+- **Cost visibility**: The script shows estimated cost before the API call and
+  actual cost (from the API response) after completion.
 - Re-review mode activates automatically when a previous review exists in
   `.claude/reviews/local-review-latest.md`
 - The review criteria are adapted from `.github/codex/prompts/pr_review.md` (same
@@ -290,8 +482,9 @@ rm -f /tmp/ai-review-diff.patch /tmp/ai-review-files.txt
 - The CI review (Codex action with full repo access) remains the authoritative final
   check — local review is a fast first pass to catch most issues early
 - **Data transmission**: In non-dry-run mode, this skill transmits the unified diff,
-  changed-file metadata, selected methodology registry text, and prior review context
-  (if present) to OpenAI via the Chat Completions API. Use `--dry-run` to preview
-  exactly what would be sent.
+  changed-file metadata, full source file contents (in standard/deep mode),
+  import-context files (in deep mode), selected methodology registry text, and
+  prior review context (if present) to OpenAI via the Chat Completions API.
+  Use `--dry-run` to preview exactly what would be sent.
 - This skill pairs naturally with the iterative workflow:
   `/ai-review-local` -> address findings -> `/ai-review-local` -> `/submit-pr`
diff --git a/.claude/scripts/openai_review.py b/.claude/scripts/openai_review.py
index 8cb3bbeb..b305b2c0 100644
--- a/.claude/scripts/openai_review.py
+++ b/.claude/scripts/openai_review.py
@@ -22,13 +22,18 @@
     - Registry section extraction: mapping changed files to REGISTRY.md sections
     - OpenAI API call: authentication, request, error handling, timeout
     - Output: writing review markdown to --output path
+    - Review state: reading/writing review-state.json (finding tracking across rounds)
+    - Cost estimation: token counting and pricing lookup
 
     The script does NOT perform secret scanning. The skill must scan before calling.
 """
 
 import argparse
+import ast
+import datetime
 import json
 import os
+import re
 import sys
 import urllib.error
 import urllib.request
@@ -139,6 +144,750 @@ def extract_registry_sections(registry_text: str, section_names: "set[str]") ->
     return "\n".join(extracted)
 
 
+# ---------------------------------------------------------------------------
+# File context — reading full source files for context
+# ---------------------------------------------------------------------------
+
+
+def resolve_changed_source_files(
+    changed_files_text: str, repo_root: str
+) -> "list[str]":
+    """Return absolute paths of changed diff_diff/ .py files that exist on disk.
+
+    Filters to diff_diff/**/*.py only (not tests, docs, configs).
+    Skips deleted files (status D in name-status output).
+    """
+    paths: list[str] = []
+    for line in changed_files_text.strip().splitlines():
+        parts = line.split()
+        if not parts:
+            continue
+        # name-status format: "M\tdiff_diff/foo.py" or "D\tdiff_diff/bar.py"
+        status = parts[0] if len(parts) >= 2 else ""
+        path = parts[-1]
+        if status == "D":
+            continue
+        if not path.startswith("diff_diff/") or not path.endswith(".py"):
+            continue
+        abs_path = os.path.join(repo_root, path)
+        if os.path.isfile(abs_path):
+            paths.append(abs_path)
+    return paths
+
+
+def read_source_files(
+    paths: "list[str]", repo_root: str, role: "str | None" = None
+) -> str:
+    """Read files and wrap in XML-style tags for the prompt.
+
+    Args:
+        paths: Absolute file paths to read.
+        repo_root: Repository root for computing relative paths.
+        role: Optional role attribute (e.g., "import-context").
+
+    Returns:
+        Concatenated string of tagged file contents.
+    """
+    parts: list[str] = []
+    for abs_path in paths:
+        rel_path = os.path.relpath(abs_path, repo_root)
+        try:
+            with open(abs_path) as f:
+                content = f.read()
+        except (OSError, IOError) as e:
+            print(
+                f"Warning: Could not read {rel_path}: {e}", file=sys.stderr
+            )
+            continue
+        if role:
+            parts.append(f'<file path="{rel_path}" role="{role}">')
+        else:
+            parts.append(f'<file path="{rel_path}">')
+        parts.append(content)
+        parts.append("</file>\n")
+    return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Import graph expansion
+# ---------------------------------------------------------------------------
+
+
+def parse_imports(file_path: str) -> "set[str]":
+    """Extract diff_diff.* imports from a Python file using AST.
+
+    Returns set of module names (e.g., {"diff_diff.linalg", "diff_diff.utils"}).
+    Resolves relative imports using the file's position within diff_diff/.
+    """
+    try:
+        with open(file_path) as f:
+            source = f.read()
+    except (OSError, IOError):
+        return set()
+
+    try:
+        tree = ast.parse(source, filename=file_path)
+    except SyntaxError:
+        print(
+            f"Warning: SyntaxError parsing {file_path} for imports.",
+            file=sys.stderr,
+        )
+        return set()
+
+    # Determine the package path for resolving relative imports
+    # e.g., diff_diff/staggered.py -> package = "diff_diff"
+    # e.g., diff_diff/visualization/_event_study.py -> package = "diff_diff.visualization"
+    package = _package_for_file(file_path)
+
+    imports: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                if alias.name.startswith("diff_diff."):
+                    imports.add(alias.name)
+        elif isinstance(node, ast.ImportFrom):
+            if node.module and node.level == 0:
+                # Absolute import: from diff_diff.linalg import ...
+                if node.module.startswith("diff_diff."):
+                    imports.add(node.module)
+            elif node.level > 0 and package:
+                # Relative import: from .foo import bar, or from . import foo
+                resolved = _resolve_relative_import(
+                    package, node.module, node.level
+                )
+                if resolved and resolved.startswith("diff_diff"):
+                    if node.module:
+                        # from .linalg import solve_ols → resolved = "diff_diff.linalg"
+                        imports.add(resolved)
+                    else:
+                        # from . import _event_study, _common → append each alias
+                        for alias in node.names:
+                            imports.add(f"{resolved}.{alias.name}")
+    return imports
+
+
+def _package_for_file(file_path: str) -> "str | None":
+    """Determine the package name for a file within the repo.
+
+    E.g., /repo/diff_diff/staggered.py -> "diff_diff"
+          /repo/diff_diff/visualization/_event_study.py -> "diff_diff.visualization"
+    """
+    # Normalize and find diff_diff in the path
+    norm = os.path.normpath(file_path)
+    parts = norm.split(os.sep)
+    try:
+        idx = parts.index("diff_diff")
+    except ValueError:
+        return None
+    # Package is everything from diff_diff up to (but not including) the filename
+    pkg_parts = parts[idx:-1]
+    return ".".join(pkg_parts) if pkg_parts else None
+
+
+def _resolve_relative_import(
+    package: str, module: "str | None", level: int
+) -> "str | None":
+    """Resolve a relative import to an absolute module name.
+
+    E.g., package="diff_diff", module="utils", level=1 -> "diff_diff.utils"
+          package="diff_diff.visualization", module=None, level=2 -> "diff_diff"
+    """
+    parts = package.split(".")
+    # Go up `level` levels (level=1 means current package, level=2 means parent)
+    up = level - 1
+    if up > 0:
+        parts = parts[:-up] if up < len(parts) else []
+    if not parts:
+        return None
+    base = ".".join(parts)
+    if module:
+        return f"{base}.{module}"
+    return base
+
+
+def resolve_module_to_path(module_name: str, repo_root: str) -> "str | None":
+    """Convert a module name to a file path if it exists.
+
+    E.g., "diff_diff.linalg" -> "<repo_root>/diff_diff/linalg.py"
+    Also tries __init__.py for packages.
+    """
+    rel = module_name.replace(".", os.sep)
+    # Try as a .py file first
+    candidate = os.path.join(repo_root, rel + ".py")
+    if os.path.isfile(candidate):
+        return candidate
+    # Try as a package (__init__.py)
+    candidate = os.path.join(repo_root, rel, "__init__.py")
+    if os.path.isfile(candidate):
+        return candidate
+    return None
+
+
+def expand_import_graph(
+    changed_paths: "list[str]", repo_root: str
+) -> "list[str]":
+    """Expand first-level imports from changed files.
+
+    Returns additional file paths (not in changed_paths) that are imported
+    by the changed files. Only diff_diff.* imports are considered.
+    """
+    changed_set = set(os.path.normpath(p) for p in changed_paths)
+    import_paths: set[str] = set()
+
+    for file_path in changed_paths:
+        for module_name in parse_imports(file_path):
+            resolved = resolve_module_to_path(module_name, repo_root)
+            if resolved and os.path.normpath(resolved) not in changed_set:
+                import_paths.add(resolved)
+
+    return sorted(import_paths)
+
+
+# ---------------------------------------------------------------------------
+# Review state — tracking findings across review rounds
+# ---------------------------------------------------------------------------
+
+_REVIEW_STATE_SCHEMA_VERSION = 1
+
+
+def parse_review_state(path: str) -> "tuple[list[dict], int]":
+    """Read review-state.json and return (findings, review_round).
+
+    Returns ([], 0) on missing file or schema mismatch.
+    """
+    try:
+        with open(path) as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        return ([], 0)
+    except (json.JSONDecodeError, OSError) as e:
+        print(
+            f"Warning: Could not parse review state {path}: {e}",
+            file=sys.stderr,
+        )
+        return ([], 0)
+
+    # Validate structure: must be a dict with expected fields
+    if not isinstance(data, dict):
+        print(
+            "Warning: review-state.json is not a JSON object. Starting fresh.",
+            file=sys.stderr,
+        )
+        return ([], 0)
+
+    if data.get("schema_version") != _REVIEW_STATE_SCHEMA_VERSION:
+        print(
+            f"Warning: review-state.json schema version mismatch "
+            f"(expected {_REVIEW_STATE_SCHEMA_VERSION}, "
+            f"got {data.get('schema_version')}). Starting fresh.",
+            file=sys.stderr,
+        )
+        return ([], 0)
+
+    findings = data.get("findings", [])
+    if not isinstance(findings, list):
+        print(
+            "Warning: review-state.json findings is not a list. Starting fresh.",
+            file=sys.stderr,
+        )
+        return ([], 0)
+    # Filter to well-formed finding dicts only — require id, severity, summary,
+    # status keys to prevent crashes in merge_findings() and compile_prompt()
+    _REQUIRED_FINDING_KEYS = {"id", "severity", "summary", "status"}
+    findings = [
+        f for f in findings
+        if isinstance(f, dict) and _REQUIRED_FINDING_KEYS.issubset(f.keys())
+    ]
+
+    review_round = data.get("review_round", 0)
+    if not isinstance(review_round, int):
+        review_round = 0
+
+    return (findings, review_round)
+
+
+def validate_review_state(
+    path: str, expected_branch: str, expected_base: str
+) -> "tuple[list[dict], int, str, bool]":
+    """Comprehensive review-state.json validation.
+
+    Returns (findings, review_round, last_commit, is_valid) where is_valid
+    means delta mode is safe to use. Checks: file exists, valid JSON,
+    schema version, branch/base match, and required finding fields.
+
+    The skill should call this once and use is_valid to gate ALL delta behavior.
+    """
+    # Read raw data for validation (separate from parse_review_state which filters)
+    try:
+        with open(path) as f:
+            data = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return ([], 0, "", False)
+
+    if not isinstance(data, dict):
+        return ([], 0, "", False)
+
+    if data.get("schema_version") != _REVIEW_STATE_SCHEMA_VERSION:
+        return ([], 0, "", False)
+
+    # Fail closed on ANY malformed finding — if raw findings contain non-dict
+    # or missing-key entries, the entire state is invalid for delta mode
+    _REQUIRED_FINDING_KEYS = {"id", "severity", "summary", "status"}
+    raw_findings = data.get("findings", [])
+    if not isinstance(raw_findings, list):
+        return ([], 0, "", False)
+    for f in raw_findings:
+        if not isinstance(f, dict) or not _REQUIRED_FINDING_KEYS.issubset(f.keys()):
+            print(
+                "Warning: review-state.json contains malformed finding. "
+                "Delta mode disabled.",
+                file=sys.stderr,
+            )
+            return ([], 0, "", False)
+
+    last_commit = data.get("last_reviewed_commit", "")
+    stored_branch = data.get("branch", "")
+    stored_base = data.get("base_ref", "")
+
+    if stored_branch != expected_branch or stored_base != expected_base:
+        print(
+            f"Warning: review-state.json is from branch '{stored_branch}' "
+            f"(base: '{stored_base}'), but current is '{expected_branch}' "
+            f"(base: '{expected_base}'). Delta mode disabled.",
+            file=sys.stderr,
+        )
+        return ([], 0, last_commit, False)
+
+    if not last_commit:
+        return ([], 0, "", False)
+
+    # All checks passed — use parse_review_state for the filtered findings
+    findings, review_round = parse_review_state(path)
+    return (findings, review_round, last_commit, True)
+
+
+def write_review_state(
+    path: str,
+    commit_sha: str,
+    base_ref: str,
+    branch: str,
+    review_round: int,
+    findings: "list[dict]",
+) -> None:
+    """Write review-state.json with the current review state."""
+    state = {
+        "schema_version": _REVIEW_STATE_SCHEMA_VERSION,
+        "last_reviewed_commit": commit_sha,
+        "base_ref": base_ref,
+        "review_round": review_round,
+        "branch": branch,
+        "reviewed_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "findings": findings,
+    }
+    os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+    with open(path, "w") as f:
+        json.dump(state, f, indent=2)
+
+
+# Match optional Markdown list prefix: -, *, +, or numbered (1., 2., etc.)
+_LP = r"^(?:[-*+]|\d+\.?)?\s*"
+
+_BLOCK_START = re.compile(
+    _LP + r"\*\*(P[0-3])\*\*"              # - **P1**, 1. **P1**, * **P1**
+    r"|" + _LP + r"\*\*(P[0-3]):\*\*"      # - **P1:** summary (bold severity with colon)
+    r"|" + _LP + r"\*\*Severity:\*\*\s*(P[0-3])"  # - **Severity:** P1
+    r"|" + _LP + r"\*\*Severity:\s*(P[0-3])\*\*"  # - **Severity: P1**
+    r"|" + _LP + r"Severity:\s*\*{0,2}`?(P[0-3])`?\*{0,2}"  # Severity: P1, Severity: **P1**, Severity: `P1`
+    r"|" + _LP + r"(P[0-3]):\s"            # - P1: summary (bare severity with colon)
+)
+
+_IMPACT_PATTERN = re.compile(r"(?:\*\*)?Impact:(?:\*\*)?\s*(.+)")
+_LOCATION_LABEL_PATTERN = re.compile(r"(?:\*\*)?Location:(?:\*\*)?\s*(.+)")
+
+_LOCATION_PATTERN = re.compile(
+    r"(?:`?)([\w/._-]+\.\w+(?::L?\d+(?:-L?\d+)?)?)(?:`?)"
+)
+
+# Lines to skip when checking if a severity line is a real finding
+_SKIP_PHRASES = [
+    "findings are resolved", "findings have been addressed",
+    "should be marked", "assessment should be",
+    "does NOT need", "do NOT need",
+    "P1+ findings", "P0/P1 findings",
+]
+_SKIP_MARKERS = ["\u26d4", "\u26a0\ufe0f", "\u2705", "Blocker", "Needs changes",
+                 "Looks good", "Path to Approval"]
+
+
+def _should_skip_line(line: str) -> bool:
+    """Return True if the line is not a real finding."""
+    stripped = line.strip()
+    if stripped.startswith("|") and stripped.endswith("|"):
+        return True
+    if re.search(r"P\d[/+]P\d", line):
+        return True
+    if any(m in line for m in _SKIP_MARKERS):
+        return True
+    if any(p in line for p in _SKIP_PHRASES):
+        return True
+    return False
+
+
+def parse_review_findings(
+    review_text: str, review_round: int
+) -> "tuple[list[dict], bool]":
+    """Parse AI review output for structured findings using block-based parsing.
+
+    Supports both single-line findings (**P1** summary) and multi-line blocks
+    (Severity/Impact/Concrete fix on separate lines).
+
+    Returns (findings, parse_uncertain) where parse_uncertain is True when
+    severity markers exist in the text but no findings could be parsed.
+    """
+    # Pass 1: collect blocks
+    blocks: list[tuple[str, str, list[str]]] = []  # (severity, section, lines)
+    current_section = "General"
+    current_block: "list[str] | None" = None
+    current_severity = ""
+    current_block_section = ""
+
+    for line in review_text.splitlines():
+        # Detect section headings
+        if line.startswith("## ") or line.startswith("### "):
+            # Flush current block
+            if current_block is not None:
+                blocks.append((current_severity, current_block_section, current_block))
+                current_block = None
+            heading = line.lstrip("#").strip()
+            if heading and "summary" not in heading.lower():
+                current_section = heading
+            continue
+
+        # Check for block start — trust the severity pattern; don't apply
+        # skip heuristics here (they're for the uncertainty scanner only)
+        sev_match = _BLOCK_START.search(line)
+        if sev_match:
+            # Flush previous block
+            if current_block is not None:
+                blocks.append((current_severity, current_block_section, current_block))
+            severity = (
+                sev_match.group(1) or sev_match.group(2)
+                or sev_match.group(3) or sev_match.group(4)
+                or sev_match.group(5) or sev_match.group(6)
+            )
+            current_severity = severity
+            current_block_section = current_section
+            current_block = [line]
+        elif current_block is not None:
+            # Continuation line — append to current block
+            # End block on blank line followed by non-indented content
+            if not line.strip():
+                current_block.append(line)
+            else:
+                current_block.append(line)
+
+    # Flush final block
+    if current_block is not None:
+        blocks.append((current_severity, current_block_section, current_block))
+
+    # Pass 2: extract findings from blocks
+    findings: list[dict] = []
+    counters: dict[str, int] = {}
+
+    for severity, section, lines in blocks:
+        # Extract summary: prefer **Impact:** line, fall back to first line text
+        summary = ""
+        for bline in lines:
+            impact_match = _IMPACT_PATTERN.search(bline)
+            if impact_match:
+                summary = re.sub(r"\*\*", "", impact_match.group(1)).strip()
+                break
+        if not summary:
+            # Fall back to text after severity on the first line
+            first_line = lines[0] if lines else ""
+            sev_match = _BLOCK_START.search(first_line)
+            if sev_match:
+                text_after = first_line[sev_match.end():].strip().lstrip(":—- ").strip()
+                summary = re.sub(r"\*\*", "", text_after).strip()
+
+        if not summary or len(summary) < 5:
+            continue
+
+        if len(summary) > 120:
+            summary = summary[:117] + "..."
+
+        # Extract location from all block lines — check labeled Location: first,
+        # then fall back to inline file:line pattern
+        location = ""
+        for bline in lines:
+            label_match = _LOCATION_LABEL_PATTERN.search(bline)
+            if label_match:
+                # Extract file:line from the label value
+                loc_in_label = _LOCATION_PATTERN.search(label_match.group(1))
+                if loc_in_label:
+                    location = loc_in_label.group(1)
+                    break
+            loc_match = _LOCATION_PATTERN.search(bline)
+            if loc_match:
+                location = loc_match.group(1)
+                break
+
+        counters[severity] = counters.get(severity, 0) + 1
+        finding_id = f"R{review_round}-{severity}-{counters[severity]}"
+
+        findings.append({
+            "id": finding_id,
+            "severity": severity,
+            "section": section,
+            "summary": summary,
+            "location": location,
+            "status": "open",
+        })
+
+    # Fail-safe: detect unparsed severity lines. Count severity-like lines in
+    # the text and compare with findings parsed. If any remain unparsed, set
+    # parse_uncertain=True. This handles both total failures (0 findings with
+    # severity markers) and partial failures (some findings but others missed).
+    parse_uncertain = False
+    severity_line_count = 0
+    for line in review_text.splitlines():
+        if _should_skip_line(line):
+            continue
+        if _BLOCK_START.search(line):
+            severity_line_count += 1
+    if severity_line_count > len(findings):
+        parse_uncertain = True
+
+    return (findings, parse_uncertain)
+
+
+def _finding_keys(f: dict) -> "tuple[tuple[str, str, str], tuple[str, str]]":
+    """Return (primary_key, fallback_key) for finding matching.
+
+    Primary: (severity, file_path, normalized_summary) — uses normalized full relative
+    path (not basename) to avoid collisions like __init__.py in different dirs.
+    Fallback: (severity, normalized_summary) — used when either side lacks a file path,
+    with unique-candidate constraint to avoid ambiguous matching.
+    """
+    summary = f.get("summary", "").lower().strip()
+    # Strip inline file:line references that cause churn on line number shifts
+    # e.g., "missing nan guard in `foo.py:l10`" → "missing nan guard in"
+    # (summary is already lowercased at this point)
+    summary = re.sub(r"`?[\w/.]+\.\w+(?::l?\d+(?:-l?\d+)?)?`?", "", summary)
+    summary = summary.strip()
+    severity = f.get("severity", "")
+    location = f.get("location", "")
+    # Use full relative path (strip line numbers only, keep directory structure)
+    file_path = location.split(":")[0] if location else ""
+    primary = (severity, file_path, summary)
+    fallback = (severity, summary)
+    return (primary, fallback)
+
+
+def merge_findings(
+    previous: "list[dict]", current: "list[dict]"
+) -> "list[dict]":
+    """Merge findings across review rounds using tiered matching.
+
+    Pass 1: Match by primary key (severity + file_basename + normalized_summary).
+    Pass 2: For remaining unmatched findings, try fallback key (severity +
+    normalized_summary) but ONLY when there's exactly one candidate (unique match).
+    After both passes: mark unconsumed previous findings as addressed.
+    """
+    # Build lookups — list per key to handle duplicates
+    prev_by_primary: dict[tuple, list[dict]] = {}
+    prev_by_fallback: dict[tuple, list[dict]] = {}
+    for f in previous:
+        primary, fallback = _finding_keys(f)
+        prev_by_primary.setdefault(primary, []).append(f)
+        prev_by_fallback.setdefault(fallback, []).append(f)
+
+    # Track consumed previous findings by id
+    consumed_ids: set[str] = set()
+    merged: list[dict] = []
+
+    # Pass 1: primary key matching
+    for f in current:
+        primary, _ = _finding_keys(f)
+        if primary[1] and primary in prev_by_primary:
+            # Current finding has a file path — try exact match
+            candidates = [
+                p for p in prev_by_primary[primary]
+                if p.get("id", "") not in consumed_ids
+            ]
+            if candidates:
+                consumed_ids.add(candidates[0].get("id", ""))
+                merged.append(f)
+                continue
+        merged.append(f)
+
+    # Pass 2: fallback matching — when EITHER the current or previous finding
+    # lacks a file path. This is symmetric: handles both "current has location,
+    # previous doesn't" and "previous has location, current doesn't."
+
+    # 2a: Current findings without file paths → try to match unconsumed previous
+    merged_pass2: list[dict] = []
+    for f in merged:
+        primary, fallback = _finding_keys(f)
+        has_file = bool(primary[1])
+
+        if has_file:
+            merged_pass2.append(f)
+            continue
+
+        # No file path on current — try fallback with unique unconsumed candidate
+        unconsumed = [
+            p for p in prev_by_fallback.get(fallback, [])
+            if p.get("id", "") not in consumed_ids
+        ]
+        if len(unconsumed) == 1:
+            consumed_ids.add(unconsumed[0].get("id", ""))
+        merged_pass2.append(f)
+
+    # 2b: Unconsumed previous findings without file paths → try to match
+    # current findings that DO have file paths (reverse direction).
+    # Track consumed current candidates to ensure one-to-one matching.
+    current_by_fallback: dict[tuple, list[dict]] = {}
+    for f in merged_pass2:
+        _, fallback = _finding_keys(f)
+        current_by_fallback.setdefault(fallback, []).append(f)
+
+    consumed_current_ids: set[str] = set()
+    for f in previous:
+        fid = f.get("id", "")
+        if fid in consumed_ids:
+            continue
+        primary, fallback = _finding_keys(f)
+        has_file = bool(primary[1])
+        if has_file:
+            continue  # Has file path — should have matched in pass 1 if possible
+        # Previous finding without file path — try fallback against current
+        # Exclude already-consumed current candidates for one-to-one matching
+        candidates = [
+            c for c in current_by_fallback.get(fallback, [])
+            if c.get("id", "") not in consumed_current_ids
+        ]
+        if len(candidates) == 1:
+            consumed_ids.add(fid)
+            consumed_current_ids.add(candidates[0].get("id", ""))
+
+    # Mark unconsumed previous findings as addressed
+    for f in previous:
+        if f.get("id", "") not in consumed_ids:
+            addressed = dict(f)
+            addressed["status"] = "addressed"
+            merged_pass2.append(addressed)
+
+    return merged_pass2
+
+
+# ---------------------------------------------------------------------------
+# Token budget management
+# ---------------------------------------------------------------------------
+
+DEFAULT_TOKEN_BUDGET = 200_000
+
+
+def apply_token_budget(
+    mandatory_tokens: int,
+    source_files_text: "str | None",
+    import_context_text: "str | None",
+    budget: int,
+) -> "tuple[str | None, str | None, list[str]]":
+    """Apply token budget, dropping lowest-priority context first.
+
+    Changed source files are always included (they are the highest-value
+    context for catching sins of omission). Only import-context files are
+    subject to the budget — they are included smallest-first until the
+    budget is exhausted.
+
+    Returns (source_files_text, import_context_text, dropped_file_names).
+    """
+    remaining = budget - mandatory_tokens
+    dropped: list[str] = []
+
+    # Source files are always included (sticky — not budget-governed)
+    if source_files_text:
+        remaining -= estimate_tokens(source_files_text)
+
+    # Include import files individually, smallest first
+    final_import_text: "str | None" = None
+    if import_context_text and import_context_text.strip():
+        # Split into individual file blocks
+        blocks = re.split(r"(?=<file )", import_context_text)
+        blocks = [b for b in blocks if b.strip()]
+
+        # Sort by size (smallest first)
+        blocks.sort(key=len)
+
+        included_blocks: list[str] = []
+        for block in blocks:
+            block_tokens = estimate_tokens(block)
+            if remaining >= block_tokens:
+                included_blocks.append(block)
+                remaining -= block_tokens
+            else:
+                # Extract filename from the block for the warning
+                name_match = re.search(r'path="([^"]+)"', block)
+                name = name_match.group(1) if name_match else "<unknown>"
+                dropped.append(name)
+
+        if included_blocks:
+            final_import_text = "\n".join(included_blocks)
+
+    if mandatory_tokens > budget:
+        print(
+            f"Warning: Mandatory prompt sections alone are ~{mandatory_tokens:,} "
+            f"tokens, exceeding --token-budget of {budget:,}. Proceeding anyway.",
+            file=sys.stderr,
+        )
+
+    return (source_files_text, final_import_text, dropped)
+
+
+# ---------------------------------------------------------------------------
+# Cost estimation
+# ---------------------------------------------------------------------------
+
+# Pricing per 1M tokens: (input, output) in USD.
+# Source: https://platform.openai.com/docs/pricing
+# MAINTENANCE: Update when OpenAI changes pricing.
+PRICING = {
+    "gpt-5.4": (2.50, 15.00),
+    "gpt-4.1": (2.00, 8.00),
+    "gpt-4.1-mini": (0.40, 1.60),
+    "o3": (2.00, 8.00),
+    "o3-mini": (1.10, 4.40),
+}
+
+
+def estimate_cost(
+    input_tokens: int, output_tokens: int, model: str
+) -> "str | None":
+    """Estimate cost for a given token count and model.
+
+    Returns a formatted string like "$0.09 input + $0.13 max output = $0.22 max",
+    or None if model pricing is unknown.
+    """
+    # Try exact match first, then longest prefix match
+    pricing = PRICING.get(model)
+    if not pricing:
+        for name in sorted(PRICING.keys(), key=len, reverse=True):
+            if model.startswith(name):
+                pricing = PRICING[name]
+                break
+    if not pricing:
+        return None
+
+    input_cost = input_tokens * pricing[0] / 1_000_000
+    output_cost = output_tokens * pricing[1] / 1_000_000
+    total = input_cost + output_cost
+    return (
+        f"${input_cost:.2f} input + ${output_cost:.2f} max output "
+        f"= ${total:.2f} max"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Prompt compilation
 # ---------------------------------------------------------------------------
@@ -211,6 +960,12 @@ def compile_prompt(
     changed_files_text: str,
     branch_info: str,
     previous_review: "str | None",
+    # New parameters for enhanced context
+    source_files_text: "str | None" = None,
+    import_context_text: "str | None" = None,
+    delta_diff_text: "str | None" = None,
+    delta_changed_files_text: "str | None" = None,
+    structured_findings: "list[dict] | None" = None,
 ) -> str:
     """Assemble the full review prompt."""
     sections: list[str] = []
@@ -227,30 +982,109 @@ def compile_prompt(
     )
     sections.append(registry_content)
 
-    # Re-review block (before changes, so the model sees it in context)
-    if previous_review:
+    # Re-review block with structured findings and/or previous review text
+    if previous_review or structured_findings:
+        sections.append("\n---\n")
+
+        if structured_findings and delta_diff_text:
+            # Enhanced re-review with structured findings table
+            round_num = max(
+                (f.get("id", "R0").split("-")[0].lstrip("R") for f in structured_findings),
+                default="0",
+            )
+            sections.append(f"## Previous Review (Round {round_num})\n")
+            sections.append("### Previous Findings\n")
+            sections.append(
+                "| ID | Severity | Section | Summary | Location | Status |\n"
+                "|-----|----------|---------|---------|----------|--------|\n"
+            )
+            for f in structured_findings:
+                # Escape pipe and newline chars to prevent table corruption
+                esc = lambda s: str(s).replace("|", "\\|").replace("\n", " ")
+                sections.append(
+                    f"| {esc(f.get('id', ''))} | {esc(f.get('severity', ''))} "
+                    f"| {esc(f.get('section', ''))} | {esc(f.get('summary', ''))} "
+                    f"| {esc(f.get('location', ''))} | {esc(f.get('status', ''))} |\n"
+                )
+            sections.append("")
+        elif previous_review:
+            sections.append("## Previous Review\n")
+
+        if previous_review:
+            sections.append(
+                "This is a follow-up review. The previous review's findings are included "
+                "below. Focus on whether previous P0/P1 findings have been addressed. "
+                "New findings on unchanged code should be marked \"[Newly identified]\". "
+                "If all previous P1+ findings are resolved, the assessment should be "
+                "\u2705 even if new P2/P3 items are noticed.\n"
+            )
+            if structured_findings:
+                sections.append("### Full Previous Review\n")
+            sections.append("<previous-review-output>")
+            sections.append(previous_review)
+            sections.append("</previous-review-output>\n")
+
+    # Delta diff section (re-review with changes since last review)
+    if delta_diff_text:
+        sections.append("\n---\n")
+        sections.append("## Changes Since Last Review\n")
+        sections.append(
+            "These are the changes made since the last review. "
+            "Focus your review on these changes. Check whether previous "
+            "P0/P1 findings have been addressed.\n"
+        )
+        if delta_changed_files_text:
+            sections.append("\nChanged files (since last review):\n")
+            sections.append(delta_changed_files_text)
+        sections.append("\nDelta diff:\n")
+        sections.append(delta_diff_text)
+
+        # Full branch diff as reference
+        sections.append("\n---\n")
+        sections.append("## Full Branch Diff (Reference Only)\n")
+        sections.append(
+            "The complete diff from the base branch is included below for "
+            "reference context. Do NOT re-review unchanged code. Only reference "
+            "this section to understand the broader context of the delta changes "
+            "above.\n"
+        )
+        sections.append("<full-diff-reference>")
+        sections.append(diff_text)
+        sections.append("</full-diff-reference>\n")
+    else:
+        # Fresh review — changes under review
         sections.append("\n---\n")
-        sections.append("## Previous Review\n")
+        sections.append("## Changes Under Review\n")
+        if branch_info:
+            sections.append(f"Branch: {branch_info}\n")
+        sections.append("\nChanged files:\n")
+        sections.append(changed_files_text)
+        sections.append("\nUnified diff (context=5):\n")
+        sections.append(diff_text)
+
+    # Full source files section
+    if source_files_text:
+        sections.append("\n---\n")
+        sections.append("## Full Source Files (Changed)\n")
         sections.append(
-            "This is a follow-up review. The previous review's findings are included "
-            "below. Focus on whether previous P0/P1 findings have been addressed. "
-            "New findings on unchanged code should be marked \"[Newly identified]\". "
-            "If all previous P1+ findings are resolved, the assessment should be "
-            "\u2705 even if new P2/P3 items are noticed.\n"
+            "The complete contents of source files modified in this change are "
+            "provided below. Use these to identify \"sins of omission\" — code "
+            "that should have been changed but wasn't (e.g., a new parameter "
+            "added to one function but missing from its wrapper).\n"
         )
-        sections.append("<previous-review-output>")
-        sections.append(previous_review)
-        sections.append("</previous-review-output>\n")
-
-    # Section 3: Changes under review
-    sections.append("\n---\n")
-    sections.append("## Changes Under Review\n")
-    if branch_info:
-        sections.append(f"Branch: {branch_info}\n")
-    sections.append("\nChanged files:\n")
-    sections.append(changed_files_text)
-    sections.append("\nUnified diff (context=5):\n")
-    sections.append(diff_text)
+        sections.append(source_files_text)
+
+    # Import context section
+    if import_context_text:
+        sections.append("\n---\n")
+        sections.append("## Import Context (Read-Only Reference)\n")
+        sections.append(
+            "These files are imported by the changed files but were not modified. "
+            "They are provided for cross-referencing function signatures, class "
+            "hierarchies, and constants. Do NOT flag issues in these files unless "
+            "directly related to changes in the diff.\n"
+        )
+        sections.append(import_context_text)
 
     return "\n".join(sections)
 
@@ -270,8 +1104,14 @@ def estimate_tokens(text: str) -> int:
     return len(text) // 4
 
 
-def call_openai(prompt: str, model: str, api_key: str) -> str:
-    """Call the OpenAI Chat Completions API and return the response content."""
+def call_openai(
+    prompt: str, model: str, api_key: str
+) -> "tuple[str, dict]":
+    """Call the OpenAI Chat Completions API.
+
+    Returns (content, usage) where usage is the API response's usage dict
+    containing prompt_tokens and completion_tokens.
+    """
     payload = {
         "model": model,
         "messages": [{"role": "user", "content": prompt}],
@@ -344,7 +1184,8 @@ def call_openai(prompt: str, model: str, api_key: str) -> str:
         print("Error: Empty review content from OpenAI API.", file=sys.stderr)
         sys.exit(1)
 
-    return content
+    usage = result.get("usage", {})
+    return (content, usage)
 
 
 # ---------------------------------------------------------------------------
@@ -415,9 +1256,68 @@ def main() -> None:
         action="store_true",
         help="Print compiled prompt to stdout without calling the API",
     )
+    # New arguments
+    parser.add_argument(
+        "--context",
+        choices=["minimal", "standard", "deep"],
+        default="standard",
+        help="Context depth: minimal (diff only), standard (full changed files), "
+        "deep (changed files + imports). Default: standard",
+    )
+    parser.add_argument(
+        "--repo-root",
+        default=None,
+        help="Repository root directory (required unless --context minimal)",
+    )
+    parser.add_argument(
+        "--include-files",
+        default=None,
+        help="Comma-separated list of extra files to include as read-only context "
+        "(paths relative to repo root, or filenames to resolve under diff_diff/)",
+    )
+    parser.add_argument(
+        "--token-budget",
+        type=int,
+        default=DEFAULT_TOKEN_BUDGET,
+        help=f"Max estimated input tokens before dropping context "
+        f"(default: {DEFAULT_TOKEN_BUDGET:,})",
+    )
+    parser.add_argument(
+        "--delta-diff",
+        default=None,
+        help="Path to delta diff file (changes since last review)",
+    )
+    parser.add_argument(
+        "--delta-changed-files",
+        default=None,
+        help="Path to delta changed-files list (since last review)",
+    )
+    parser.add_argument(
+        "--review-state",
+        default=None,
+        help="Path to review-state.json for finding tracking across rounds",
+    )
+    parser.add_argument(
+        "--commit-sha",
+        default=None,
+        help="HEAD commit SHA (required when --review-state is set)",
+    )
+    parser.add_argument(
+        "--base-ref",
+        default="main",
+        help="Base branch name for review-state.json (default: main)",
+    )
 
     args = parser.parse_args()
 
+    # Post-parse validation
+    if args.context != "minimal" and not args.repo_root:
+        parser.error(
+            "--repo-root is required when --context is 'standard' or 'deep'"
+        )
+    if args.review_state and not args.commit_sha:
+        parser.error("--commit-sha is required when --review-state is set")
+
     # Validate API key (unless dry-run)
     api_key = os.environ.get("OPENAI_API_KEY", "")
     if not args.dry_run and not api_key:
@@ -468,7 +1368,146 @@ def main() -> None:
                 file=sys.stderr,
             )
 
-    # Compile prompt
+    # --- Read delta diff (before context expansion so we can scope context) ---
+    delta_diff_text = None
+    delta_changed_files_text = None
+    if args.delta_diff:
+        try:
+            with open(args.delta_diff) as f:
+                delta_diff_text = f.read()
+            if not delta_diff_text.strip():
+                delta_diff_text = None
+        except FileNotFoundError:
+            print(
+                f"Warning: Delta diff not found: {args.delta_diff}.",
+                file=sys.stderr,
+            )
+    if args.delta_changed_files:
+        try:
+            with open(args.delta_changed_files) as f:
+                delta_changed_files_text = f.read()
+        except FileNotFoundError:
+            pass
+
+    # --- Context expansion ---
+    source_files_text = None
+    import_context_text = None
+
+    if args.context in ("standard", "deep") and args.repo_root:
+        # In delta mode, scope source/import context to delta files only
+        context_files_text = (
+            delta_changed_files_text
+            if delta_changed_files_text
+            else changed_files_text
+        )
+        changed_paths = resolve_changed_source_files(
+            context_files_text, args.repo_root
+        )
+        if changed_paths:
+            source_files_text = read_source_files(changed_paths, args.repo_root)
+
+        if args.context == "deep":
+            import_paths = expand_import_graph(changed_paths, args.repo_root)
+            if import_paths:
+                import_context_text = read_source_files(
+                    import_paths, args.repo_root, role="import-context"
+                )
+
+    # Handle --include-files (confined to repo root for security)
+    if args.include_files and args.repo_root:
+        repo_root_real = os.path.realpath(args.repo_root)
+        extra_paths: list[str] = []
+        for name in args.include_files.split(","):
+            name = name.strip()
+            if not name:
+                continue
+            # Reject absolute paths
+            if os.path.isabs(name):
+                print(
+                    f"Warning: --include-files: absolute paths not allowed "
+                    f"({name}), skipping.",
+                    file=sys.stderr,
+                )
+                continue
+            if os.sep in name or "/" in name:
+                # Path relative to repo root
+                candidate = os.path.join(args.repo_root, name)
+            else:
+                # Filename to resolve under diff_diff/
+                candidate = os.path.join(args.repo_root, "diff_diff", name)
+            # Normalize and verify within repo root (prevent ../ traversal)
+            candidate = os.path.realpath(candidate)
+            if not candidate.startswith(repo_root_real + os.sep):
+                print(
+                    f"Warning: --include-files: {name} resolves outside repo "
+                    f"root, skipping.",
+                    file=sys.stderr,
+                )
+                continue
+            if os.path.isfile(candidate):
+                extra_paths.append(candidate)
+            else:
+                print(
+                    f"Warning: --include-files: {name} not found, skipping.",
+                    file=sys.stderr,
+                )
+        if extra_paths:
+            extra_text = read_source_files(
+                extra_paths, args.repo_root, role="import-context"
+            )
+            if import_context_text:
+                import_context_text += "\n" + extra_text
+            else:
+                import_context_text = extra_text
+
+    # --- Read review state for re-review ---
+    structured_findings = None
+    previous_round = 0
+    if args.review_state:
+        structured_findings, previous_round = parse_review_state(
+            args.review_state
+        )
+        if not structured_findings:
+            structured_findings = None  # Normalize empty to None
+
+    # --- Token budget ---
+    # Estimate mandatory content size (always included, not budget-governed)
+    mandatory_est = (
+        estimate_tokens(criteria_text)
+        + estimate_tokens(registry_content)
+        + estimate_tokens(diff_text)
+    )
+    # In delta mode, changed_files_text is NOT in the prompt (replaced by delta sections);
+    # only count it for fresh reviews where it appears in "Changes Under Review"
+    if not delta_diff_text:
+        mandatory_est += estimate_tokens(changed_files_text)
+    if previous_review:
+        mandatory_est += estimate_tokens(previous_review)
+    if delta_diff_text:
+        mandatory_est += estimate_tokens(delta_diff_text)
+    if delta_changed_files_text:
+        mandatory_est += estimate_tokens(delta_changed_files_text)
+    if structured_findings:
+        # Rough estimate for the findings table rendered in compile_prompt
+        findings_text = "\n".join(str(f) for f in structured_findings)
+        mandatory_est += estimate_tokens(findings_text)
+
+    # Apply budget: source files are always included (sticky);
+    # only import-context files are dropped when over budget.
+    source_files_text, import_context_text, dropped = apply_token_budget(
+        mandatory_tokens=mandatory_est,
+        source_files_text=source_files_text,
+        import_context_text=import_context_text,
+        budget=args.token_budget,
+    )
+    if dropped:
+        print(
+            f"Warning: Token budget exceeded. Dropped import context files: "
+            f"{', '.join(dropped)}",
+            file=sys.stderr,
+        )
+
+    # --- Compile prompt ---
     prompt = compile_prompt(
         criteria_text=criteria_text,
         registry_content=registry_content,
@@ -476,6 +1515,11 @@ def main() -> None:
         changed_files_text=changed_files_text,
         branch_info=args.branch_info,
         previous_review=previous_review,
+        source_files_text=source_files_text,
+        import_context_text=import_context_text,
+        delta_diff_text=delta_diff_text,
+        delta_changed_files_text=delta_changed_files_text,
+        structured_findings=structured_findings,
     )
 
     est_tokens = estimate_tokens(prompt)
@@ -486,32 +1530,95 @@ def main() -> None:
             file=sys.stderr,
         )
 
+    # Cost estimate
+    cost_str = estimate_cost(est_tokens, DEFAULT_MAX_TOKENS, args.model)
+
     # Dry-run: print prompt and exit
     if args.dry_run:
         print(prompt)
         print(f"\n--- Dry run ---", file=sys.stderr)
         print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
+        if cost_str:
+            print(f"Estimated cost: {cost_str}", file=sys.stderr)
         print(f"Model: {args.model}", file=sys.stderr)
+        print(f"Context: {args.context}", file=sys.stderr)
         if previous_review:
             print("Mode: Re-review (previous review included)", file=sys.stderr)
+        if delta_diff_text:
+            print("Mode: Delta-diff (changes since last review)", file=sys.stderr)
         sys.exit(0)
 
     # Call OpenAI API
     print(f"Sending review to {args.model}...", file=sys.stderr)
     print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
+    if cost_str:
+        print(f"Estimated cost: {cost_str}", file=sys.stderr)
+    print(f"Context: {args.context}", file=sys.stderr)
     if previous_review:
         print("Mode: Re-review (previous review included)", file=sys.stderr)
+    if delta_diff_text:
+        print("Mode: Delta-diff (changes since last review)", file=sys.stderr)
 
-    review_content = call_openai(prompt, args.model, api_key)
+    review_content, usage = call_openai(prompt, args.model, api_key)
 
-    # Write output
+    # Write review output
     os.makedirs(os.path.dirname(args.output), exist_ok=True)
     with open(args.output, "w") as f:
         f.write(review_content)
 
+    # Write review state if requested
+    if args.review_state and args.commit_sha:
+        current_round = previous_round + 1
+        current_findings, parse_uncertain = parse_review_findings(
+            review_content, current_round
+        )
+        if parse_uncertain:
+            print(
+                "Warning: Could not parse findings from review output. "
+                "Preserving prior review state baseline (not advancing "
+                "last_reviewed_commit).",
+                file=sys.stderr,
+            )
+            # Do NOT write review state at all — keep prior baseline intact
+            # regardless of whether prior findings exist, so the next delta
+            # review doesn't skip unparsed code
+        elif structured_findings:
+            final_findings = merge_findings(structured_findings, current_findings)
+            write_review_state(
+                path=args.review_state,
+                commit_sha=args.commit_sha,
+                base_ref=args.base_ref,
+                branch=args.branch_info,
+                review_round=current_round,
+                findings=final_findings,
+            )
+        else:
+            write_review_state(
+                path=args.review_state,
+                commit_sha=args.commit_sha,
+                base_ref=args.base_ref,
+                branch=args.branch_info,
+                review_round=current_round,
+                findings=current_findings,
+            )
+
+    # Print completion summary with actual usage
+    actual_input = usage.get("prompt_tokens", 0)
+    actual_output = usage.get("completion_tokens", 0)
+    actual_cost = estimate_cost(actual_input, actual_output, args.model)
+
     print(f"\nAI Review complete.", file=sys.stderr)
     print(f"Model: {args.model}", file=sys.stderr)
-    print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
+    if actual_input:
+        print(
+            f"Actual tokens: {actual_input:,} input, "
+            f"{actual_output:,} output",
+            file=sys.stderr,
+        )
+        if actual_cost:
+            print(f"Actual cost: {actual_cost}", file=sys.stderr)
+    else:
+        print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
     print(f"Output saved to: {args.output}", file=sys.stderr)
 
 
diff --git a/.github/codex/prompts/pr_review.md b/.github/codex/prompts/pr_review.md
index 4cc9ea45..a64124de 100644
--- a/.github/codex/prompts/pr_review.md
+++ b/.github/codex/prompts/pr_review.md
@@ -158,3 +158,22 @@ ci = compute_confidence_interval(effect, se)  # produces point estimate for se=0
 t_stat, p_value, conf_int = safe_inference(effect, se)
 ```
 Flag partial NaN guards as P0 — they produce misleading statistical output.
+
+### 4. Incomplete parameter propagation (P1)
+For each changed public method signature (new parameter, renamed parameter,
+changed default), verify that ALL callers and wrappers in the changed files
+also received the same parameter. Check:
+- Direct callers within the same file
+- Cross-file callers visible in the diff or provided source files
+- Wrapper methods that delegate to the changed method
+- `get_params()` / `set_params()` return dicts
+Flag each missing propagation as P1.
+
+### 5. Semantic contract violation in composed values (P1)
+When code composes, transforms, or normalizes values from different sources
+(e.g., weights from different estimators, variance components, time indices),
+verify the semantic contract of each source is preserved through the operation:
+- Units and scales must be compatible before arithmetic
+- Normalization denominators must use the correct population
+- Index alignment must match the data contract (inner vs outer join semantics)
+Flag as P1 if semantic contracts are silently violated with no warning or check.
diff --git a/tests/test_openai_review.py b/tests/test_openai_review.py
index 8b4c2698..97675176 100644
--- a/tests/test_openai_review.py
+++ b/tests/test_openai_review.py
@@ -6,6 +6,8 @@
 """
 
 import importlib.util
+import json
+import os
 import pathlib
 import subprocess
 
@@ -63,6 +65,13 @@ def review_mod():
     return mod
 
 
+@pytest.fixture
+def repo_root():
+    """Return the repo root directory."""
+    assert _SCRIPT_PATH is not None
+    return str(_SCRIPT_PATH.parent.parent.parent)
+
+
 # ---------------------------------------------------------------------------
 # _sections_for_file
 # ---------------------------------------------------------------------------
@@ -196,7 +205,7 @@ def test_replaces_pr_language(self, review_mod):
 
     def test_warns_on_missing_substitution(self, review_mod, capsys):
         # A text that doesn't contain any of the expected patterns
-        result = review_mod._adapt_review_criteria("Totally different text")
+        review_mod._adapt_review_criteria("Totally different text")
         captured = capsys.readouterr()
         assert "Warning: prompt substitution did not match" in captured.err
 
@@ -259,6 +268,136 @@ def test_no_previous_review_block_when_none(self, review_mod):
         assert "<previous-review-output>" not in result
 
 
+# ---------------------------------------------------------------------------
+# compile_prompt — enhanced context modes
+# ---------------------------------------------------------------------------
+
+
+class TestCompilePromptWithContext:
+    """Test compile_prompt with the new context parameters."""
+
+    def test_backward_compatibility(self, review_mod):
+        """Original args produce same structure — no source/import sections."""
+        result = review_mod.compile_prompt(
+            criteria_text="Criteria.",
+            registry_content="Registry.",
+            diff_text="diff content",
+            changed_files_text="M\tfoo.py",
+            branch_info="main",
+            previous_review=None,
+        )
+        assert "Full Source Files" not in result
+        assert "Import Context" not in result
+        assert "Changes Under Review" in result
+
+    def test_standard_mode_includes_source_files(self, review_mod):
+        result = review_mod.compile_prompt(
+            criteria_text="C.",
+            registry_content="R.",
+            diff_text="D.",
+            changed_files_text="M\tf.py",
+            branch_info="b",
+            previous_review=None,
+            source_files_text='<file path="diff_diff/foo.py">content</file>',
+        )
+        assert "Full Source Files (Changed)" in result
+        assert "sins of omission" in result
+        assert '<file path="diff_diff/foo.py">' in result
+        assert "Import Context" not in result
+
+    def test_deep_mode_includes_import_context(self, review_mod):
+        result = review_mod.compile_prompt(
+            criteria_text="C.",
+            registry_content="R.",
+            diff_text="D.",
+            changed_files_text="M\tf.py",
+            branch_info="b",
+            previous_review=None,
+            source_files_text="<file>src</file>",
+            import_context_text='<file path="diff_diff/utils.py" role="import-context">utils</file>',
+        )
+        assert "Full Source Files (Changed)" in result
+        assert "Import Context (Read-Only Reference)" in result
+        assert "Do NOT flag issues in these files" in result
+
+    def test_delta_diff_structure(self, review_mod):
+        result = review_mod.compile_prompt(
+            criteria_text="C.",
+            registry_content="R.",
+            diff_text="full diff content",
+            changed_files_text="M\tf.py",
+            branch_info="b",
+            previous_review="Previous findings.",
+            delta_diff_text="delta diff content",
+            delta_changed_files_text="M\tf.py",
+        )
+        assert "Changes Since Last Review" in result
+        assert "delta diff content" in result
+        assert "Full Branch Diff (Reference Only)" in result
+        assert "<full-diff-reference>" in result
+        assert "full diff content" in result
+
+    def test_delta_diff_with_structured_findings(self, review_mod):
+        findings = [
+            {
+                "id": "R1-P1-1",
+                "severity": "P1",
+                "section": "Methodology",
+                "summary": "Missing NaN guard",
+                "location": "diff_diff/foo.py:L42",
+                "status": "open",
+            }
+        ]
+        result = review_mod.compile_prompt(
+            criteria_text="C.",
+            registry_content="R.",
+            diff_text="full diff",
+            changed_files_text="M\tf.py",
+            branch_info="b",
+            previous_review="Prev.",
+            delta_diff_text="delta",
+            structured_findings=findings,
+        )
+        assert "Previous Findings" in result
+        assert "R1-P1-1" in result
+        assert "Missing NaN guard" in result
+        assert "diff_diff/foo.py:L42" in result
+
+    def test_fresh_review_no_delta_sections(self, review_mod):
+        """Without delta_diff_text, no delta-specific sections appear."""
+        result = review_mod.compile_prompt(
+            criteria_text="C.",
+            registry_content="R.",
+            diff_text="D.",
+            changed_files_text="M\tf.py",
+            branch_info="b",
+            previous_review=None,
+            source_files_text="<file>src</file>",
+        )
+        assert "Changes Since Last Review" not in result
+        assert "Full Branch Diff (Reference Only)" not in result
+        assert "Changes Under Review" in result
+
+
+    def test_findings_table_escapes_pipe_chars(self, review_mod):
+        """Summary containing | should be escaped in the findings table."""
+        findings = [
+            {
+                "id": "R1-P1-1", "severity": "P1", "section": "Code Quality",
+                "summary": "Return type str | None is wrong",
+                "location": "foo.py:L10", "status": "open",
+            }
+        ]
+        result = review_mod.compile_prompt(
+            criteria_text="C.", registry_content="R.", diff_text="D.",
+            changed_files_text="M\tf.py", branch_info="b",
+            previous_review="Prev.", delta_diff_text="delta",
+            structured_findings=findings,
+        )
+        # The pipe in "str | None" should be escaped as "str \| None"
+        assert "str \\| None" in result
+
+
 # ---------------------------------------------------------------------------
 # PREFIX_TO_SECTIONS mapping coverage
 # ---------------------------------------------------------------------------
@@ -338,3 +477,1043 @@ def test_rough_estimate(self, review_mod):
 
     def test_empty_string(self, review_mod):
         assert review_mod.estimate_tokens("") == 0
+
+
+# ---------------------------------------------------------------------------
+# resolve_changed_source_files
+# ---------------------------------------------------------------------------
+
+
+class TestResolveChangedSourceFiles:
+    def test_filters_to_diff_diff_py_files(self, review_mod, repo_root):
+        text = "M\tdiff_diff/bacon.py\nM\ttests/test_bacon.py\nM\tCLAUDE.md"
+        paths = review_mod.resolve_changed_source_files(text, repo_root)
+        assert any("bacon.py" in p for p in paths)
+        assert not any("test_bacon" in p for p in paths)
+        assert not any("CLAUDE" in p for p in paths)
+
+    def test_skips_deleted_files(self, review_mod, repo_root):
+        text = "D\tdiff_diff/deleted_file.py\nM\tdiff_diff/bacon.py"
+        paths = review_mod.resolve_changed_source_files(text, repo_root)
+        assert not any("deleted_file" in p for p in paths)
+        assert any("bacon.py" in p for p in paths)
+
+    def test_empty_input(self, review_mod, repo_root):
+        assert review_mod.resolve_changed_source_files("", repo_root) == []
+
+    def test_skips_nonexistent_files(self, review_mod, repo_root):
+        text = "M\tdiff_diff/nonexistent_xyz.py"
+        assert review_mod.resolve_changed_source_files(text, repo_root) == []
+
+
+# ---------------------------------------------------------------------------
+# read_source_files
+# ---------------------------------------------------------------------------
+
+
+class TestReadSourceFiles:
+    def test_produces_xml_tagged_output(self, review_mod, repo_root):
+        # Use a real file that exists
+        path = os.path.join(repo_root, "diff_diff", "__init__.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/__init__.py not found")
+        result = review_mod.read_source_files([path], repo_root)
+        assert '<file path="diff_diff/__init__.py">' in result
+        assert "</file>" in result
+
+    def test_role_attribute(self, review_mod, repo_root):
+        path = os.path.join(repo_root, "diff_diff", "__init__.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/__init__.py not found")
+        result = review_mod.read_source_files([path], repo_root, role="import-context")
+        assert 'role="import-context"' in result
+
+    def test_handles_missing_file(self, review_mod, repo_root, capsys):
+        result = review_mod.read_source_files(
+            ["/nonexistent/path.py"], repo_root
+        )
+        assert result == ""
+        captured = capsys.readouterr()
+        assert "Warning" in captured.err
+
+    def test_empty_paths(self, review_mod, repo_root):
+        assert review_mod.read_source_files([], repo_root) == ""
+
+
+# ---------------------------------------------------------------------------
+# parse_imports
+# ---------------------------------------------------------------------------
+
+
+class TestParseImports:
+    def test_extracts_absolute_import(self, review_mod, repo_root):
+        """Test with a real source file that imports diff_diff modules."""
+        path = os.path.join(repo_root, "diff_diff", "bacon.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/bacon.py not found")
+        imports = review_mod.parse_imports(path)
+        # bacon.py should import from diff_diff (e.g., diff_diff.linalg or diff_diff.utils)
+        assert all(m.startswith("diff_diff.") for m in imports)
+
+    def test_ignores_non_diff_diff_imports(self, review_mod, tmp_path):
+        test_file = tmp_path / "test.py"
+        test_file.write_text("import numpy\nimport pandas\nfrom os import path\n")
+        imports = review_mod.parse_imports(str(test_file))
+        assert imports == set()
+
+    def test_submodule_imports_not_truncated(self, review_mod, repo_root):
+        """Submodule imports should keep full path, not truncate to 2 components."""
+        path = os.path.join(repo_root, "diff_diff", "visualization", "_staggered.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/visualization/_staggered.py not found")
+        imports = review_mod.parse_imports(path)
+        # Should include full submodule paths like diff_diff.visualization._common
+        has_submodule = any(
+            m.count(".") >= 2 for m in imports  # at least 3 components
+        )
+        assert has_submodule, (
+            f"Expected submodule imports (3+ components) but got: {imports}"
+        )
+
+    def test_relative_import_aliases_expanded(self, review_mod, repo_root):
+        """from . import _event_study should resolve to diff_diff.visualization._event_study."""
+        path = os.path.join(repo_root, "diff_diff", "visualization", "__init__.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/visualization/__init__.py not found")
+        imports = review_mod.parse_imports(path)
+        # Should include individual submodule names, not just the package
+        submodules = [m for m in imports if m.startswith("diff_diff.visualization._")]
+        assert len(submodules) > 0, (
+            f"Expected visualization submodule imports but got: {imports}"
+        )
+
+    def test_handles_syntax_error(self, review_mod, tmp_path, capsys):
+        test_file = tmp_path / "bad.py"
+        test_file.write_text("def foo(:\n  pass\n")
+        imports = review_mod.parse_imports(str(test_file))
+        assert imports == set()
+        captured = capsys.readouterr()
+        assert "SyntaxError" in captured.err
+
+    def test_handles_missing_file(self, review_mod):
+        imports = review_mod.parse_imports("/nonexistent/file.py")
+        assert imports == set()
+
+
+# ---------------------------------------------------------------------------
+# expand_import_graph
+# ---------------------------------------------------------------------------
+
+
+class TestExpandImportGraph:
+    def test_expands_imports(self, review_mod, repo_root):
+        """Expanding imports for a real file produces additional paths."""
+        path = os.path.join(repo_root, "diff_diff", "bacon.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/bacon.py not found")
+        result = review_mod.expand_import_graph([path], repo_root)
+        # Should find at least some imports (linalg, utils, etc.)
+        assert isinstance(result, list)
+        # All paths should be absolute and exist
+        for p in result:
+            assert os.path.isabs(p)
+            assert os.path.isfile(p)
+
+    def test_deduplicates_against_changed_set(self, review_mod, repo_root):
+        """Files already in changed_paths should not appear in expansion."""
+        bacon = os.path.join(repo_root, "diff_diff", "bacon.py")
+        linalg = os.path.join(repo_root, "diff_diff", "linalg.py")
+        if not (os.path.isfile(bacon) and os.path.isfile(linalg)):
+            pytest.skip("required files not found")
+        result = review_mod.expand_import_graph([bacon, linalg], repo_root)
+        assert linalg not in [os.path.normpath(p) for p in result]
+
+    def test_visualization_init_includes_submodules(self, review_mod, repo_root):
+        """expand_import_graph on visualization/__init__.py should include submodules."""
+        path = os.path.join(repo_root, "diff_diff", "visualization", "__init__.py")
+        if not os.path.isfile(path):
+            pytest.skip("diff_diff/visualization/__init__.py not found")
+        result = review_mod.expand_import_graph([path], repo_root)
+        filenames = [os.path.basename(p) for p in result]
+        # Should include visualization submodules like _event_study.py, _staggered.py
+        assert any(f.startswith("_") and f.endswith(".py") for f in filenames), (
+            f"Expected visualization submodule files but got: {filenames}"
+        )
+
+    def test_empty_input(self, review_mod, repo_root):
+        assert review_mod.expand_import_graph([], repo_root) == []
+
+
+# ---------------------------------------------------------------------------
+# estimate_cost
+# ---------------------------------------------------------------------------
+
+
+class TestEstimateCost:
+    def test_known_model(self, review_mod):
+        result = review_mod.estimate_cost(100_000, 16_384, "gpt-5.4")
+        assert result is not None
+        assert "$" in result
+        assert "input" in result
+        assert "output" in result
+
+    def test_unknown_model(self, review_mod):
+        result = review_mod.estimate_cost(100_000, 16_384, "unknown-model")
+        assert result is None
+
+    def test_prefix_match(self, review_mod):
+        # gpt-5.4-turbo should match gpt-5.4 prefix
+        result = review_mod.estimate_cost(100_000, 16_384, "gpt-5.4-turbo")
+        assert result is not None
+
+
+# ---------------------------------------------------------------------------
+# Token budget — apply_token_budget
+# ---------------------------------------------------------------------------
+
+
+class TestTokenBudget:
+    def test_under_budget_all_included(self, review_mod):
+        src = "y" * 400
+        imp = '<file path="a.py">small</file>'
+        result_src, result_imp, dropped = review_mod.apply_token_budget(
+            mandatory_tokens=100,
+            source_files_text=src,
+            import_context_text=imp,
+            budget=200_000,
+        )
+        assert result_src == src
+        assert result_imp is not None
+        assert dropped == []
+
+    def test_over_budget_drops_imports_not_source(self, review_mod):
+        src = "y" * 400
+        imp = (
+            '<file path="big.py">' + "z" * 40_000 + "</file>\n"
+            '<file path="small.py">' + "z" * 400 + "</file>"
+        )
+        result_src, result_imp, dropped = review_mod.apply_token_budget(
+            mandatory_tokens=200_000,  # fills budget
+            source_files_text=src,
+            import_context_text=imp,
+            budget=200_000,
+        )
+        # Source files always included (sticky)
+        assert result_src == src
+        # At least one import file should be dropped
+        assert len(dropped) > 0
+
+    def test_source_files_always_included(self, review_mod):
+        """Source files are sticky — never dropped even when over budget."""
+        src = "y" * 800_000  # large source files
+        result_src, _, dropped = review_mod.apply_token_budget(
+            mandatory_tokens=100_000,
+            source_files_text=src,
+            import_context_text=None,
+            budget=50_000,  # budget smaller than mandatory alone
+        )
+        assert result_src == src
+
+    def test_mandatory_exceeds_budget_warns(self, review_mod, capsys):
+        review_mod.apply_token_budget(
+            mandatory_tokens=300_000,
+            source_files_text=None,
+            import_context_text=None,
+            budget=200_000,
+        )
+        captured = capsys.readouterr()
+        assert "exceeding --token-budget" in captured.err
+
+
+# ---------------------------------------------------------------------------
+# Review state — parse and write
+# ---------------------------------------------------------------------------
+
+
+class TestParseReviewState:
+    def test_reads_valid_json(self, review_mod, tmp_path):
+        state_file = tmp_path / "review-state.json"
+        state = {
+            "schema_version": 1,
+            "last_reviewed_commit": "abc123",
+            "review_round": 2,
+            "findings": [{"id": "R1-P1-1", "severity": "P1", "summary": "Test", "status": "open"}],
+        }
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert len(findings) == 1
+        assert round_num == 2
+
+    def test_missing_file_returns_empty(self, review_mod):
+        findings, round_num = review_mod.parse_review_state("/nonexistent.json")
+        assert findings == []
+        assert round_num == 0
+
+    def test_schema_version_mismatch(self, review_mod, tmp_path, capsys):
+        state_file = tmp_path / "review-state.json"
+        state = {"schema_version": 999, "findings": []}
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert findings == []
+        assert round_num == 0
+        captured = capsys.readouterr()
+        assert "schema version mismatch" in captured.err
+
+    def test_non_dict_root_returns_empty(self, review_mod, tmp_path, capsys):
+        state_file = tmp_path / "review-state.json"
+        state_file.write_text("[1, 2, 3]")  # list, not dict
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert findings == []
+        assert round_num == 0
+        captured = capsys.readouterr()
+        assert "not a JSON object" in captured.err
+
+    def test_non_list_findings_returns_empty(self, review_mod, tmp_path, capsys):
+        state_file = tmp_path / "review-state.json"
+        state = {"schema_version": 1, "findings": "not a list", "review_round": 1}
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert findings == []
+        assert round_num == 0
+        captured = capsys.readouterr()
+        assert "not a list" in captured.err
+
+    def test_non_int_round_defaults_to_zero(self, review_mod, tmp_path):
+        state_file = tmp_path / "review-state.json"
+        state = {"schema_version": 1, "findings": [], "review_round": "not_int"}
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert findings == []
+        assert round_num == 0
+
+    def test_non_dict_findings_filtered(self, review_mod, tmp_path):
+        """Non-dict elements in findings list are filtered out, not crash."""
+        state_file = tmp_path / "review-state.json"
+        good_finding = {
+            "id": "R1-P1-1", "severity": "P1",
+            "summary": "Test finding", "status": "open",
+        }
+        state = {
+            "schema_version": 1,
+            "findings": ["oops", good_finding, 42],
+            "review_round": 1,
+        }
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert len(findings) == 1
+        assert findings[0]["id"] == "R1-P1-1"
+        assert round_num == 1
+
+    def test_findings_missing_required_keys_filtered(self, review_mod, tmp_path):
+        """Dict findings missing required keys (id, severity, summary, status) filtered."""
+        state_file = tmp_path / "review-state.json"
+        state = {
+            "schema_version": 1,
+            "findings": [
+                {"id": "R1-P1-1", "severity": "P1"},  # missing summary, status
+                {"id": "R1-P1-2", "severity": "P1", "summary": "Good", "status": "open"},
+                {"severity": "P2", "summary": "No id", "status": "open"},  # missing id
+            ],
+            "review_round": 1,
+        }
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert len(findings) == 1
+        assert findings[0]["id"] == "R1-P1-2"
+
+
+class TestWriteReviewState:
+    def test_writes_valid_json(self, review_mod, tmp_path):
+        path = str(tmp_path / "review-state.json")
+        review_mod.write_review_state(
+            path=path,
+            commit_sha="abc123",
+            base_ref="main",
+            branch="feature/test",
+            review_round=1,
+            findings=[{"id": "R1-P0-1", "severity": "P0"}],
+        )
+        with open(path) as f:
+            data = json.load(f)
+        assert data["schema_version"] == 1
+        assert data["last_reviewed_commit"] == "abc123"
+        assert data["review_round"] == 1
+        assert len(data["findings"]) == 1
+
+    def test_round_trips_with_parse(self, review_mod, tmp_path):
+        path = str(tmp_path / "review-state.json")
+        original_findings = [
+            {"id": "R1-P1-1", "severity": "P1", "summary": "Test finding", "status": "open"}
+        ]
+        review_mod.write_review_state(
+            path=path,
+            commit_sha="def456",
+            base_ref="main",
+            branch="fix/bug",
+            review_round=3,
+            findings=original_findings,
+        )
+        findings, round_num = review_mod.parse_review_state(path)
+        assert round_num == 3
+        assert findings[0]["id"] == "R1-P1-1"
+
+
+# ---------------------------------------------------------------------------
+# Review findings parsing
+# ---------------------------------------------------------------------------
+
+
+class TestParseReviewFindings:
+    def test_extracts_findings(self, review_mod):
+        review_text = (
+            "## Methodology\n\n"
+            "**P1** Missing NaN guard in `diff_diff/staggered.py:L145`\n\n"
+            "## Code Quality\n\n"
+            "**P2** Unused import in `diff_diff/utils.py:L12`\n\n"
+            "## Summary\n"
+            "Overall assessment: Looks good\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) >= 2
+        assert not uncertain
+        severities = {f["severity"] for f in findings}
+        assert "P1" in severities
+        assert "P2" in severities
+
+    def test_empty_review(self, review_mod):
+        findings, uncertain = review_mod.parse_review_findings("No issues found.", 1)
+        assert findings == []
+        assert not uncertain
+
+    def test_finding_ids_follow_format(self, review_mod):
+        review_text = (
+            "**P0** Critical bug in `foo.py:L1`\n"
+            "**P1** Minor issue in the code\n"
+        )
+        findings, _ = review_mod.parse_review_findings(review_text, 2)
+        for f in findings:
+            assert f["id"].startswith("R2-")
+            assert f["status"] == "open"
+
+    def test_parses_bold_severity_format(self, review_mod):
+        """**P1** format should be parsed."""
+        review_text = "**P1** Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+
+    def test_parses_bold_colon_severity(self, review_mod):
+        """- **P1:** format (bold severity with colon) should be parsed."""
+        review_text = "- **P1:** Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_bare_colon_severity(self, review_mod):
+        """- P1: format (bare severity with colon) should be parsed."""
+        review_text = "- P1: Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_mixed_format_both_parsed(self, review_mod):
+        """Review with supported + previously-unsupported format should parse both."""
+        review_text = (
+            "**P2** Code quality issue in `bar.py:L5`\n"
+            "- **P1:** Missing NaN guard in `foo.py:L10`\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 2
+        severities = {f["severity"] for f in findings}
+        assert "P1" in severities
+        assert "P2" in severities
+        assert not uncertain
+
+    def test_parses_severity_bold_value(self, review_mod):
+        """Severity: **P1** format (bold value after plain label) should be parsed."""
+        review_text = "- Severity: **P1** — Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_numbered_list_severity(self, review_mod):
+        """1. Severity: P1 format should be parsed."""
+        review_text = "1. Severity: P1 — Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_starred_bold_severity(self, review_mod):
+        """* **Severity:** P1 format should be parsed."""
+        review_text = "* **Severity:** P1 — Missing NaN guard in `bar.py:L5`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_numbered_bold_severity_triggers_uncertainty(self, review_mod):
+        """1. **Severity:** P1 with no parseable summary → uncertain=True."""
+        review_text = "1. **Severity:** P1\n"
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+        assert uncertain
+
+    def test_parses_bold_label_format(self, review_mod):
+        """**Severity:** P1 format should be parsed."""
+        review_text = "- **Severity:** P1 — Missing NaN guard in `foo.py:L10`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_plain_label_format(self, review_mod):
+        """Severity: P2 format should be parsed."""
+        review_text = "Severity: P2 — Unused import in `bar.py:L5`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P2"
+
+    def test_finding_with_skip_marker_in_summary_still_parsed(self, review_mod):
+        """Findings whose summaries contain skip markers like 'Path to Approval' should parse."""
+        review_text = "**P2** The prompt omits the Path to Approval section in `foo.py:L10`\n"
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P2"
+        assert not uncertain
+
+    def test_finding_with_looks_good_in_summary(self, review_mod):
+        """Finding mentioning 'Looks good' in summary should not be skipped."""
+        review_text = "**P1** Assessment says Looks good but edge case is unhandled in `bar.py:L5`\n"
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_multiline_finding_block(self, review_mod):
+        """Multi-line finding blocks (Severity/Impact on separate lines)."""
+        review_text = (
+            "## Code Quality\n\n"
+            "- **Severity:** P1\n"
+            "  **Impact:** Missing NaN guard causes silent incorrect output\n"
+            "  **Location:** `diff_diff/staggered.py:L145`\n"
+            "  **Concrete fix:** Use safe_inference()\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+        assert "NaN guard" in findings[0]["summary"]
+        assert not uncertain
+
+    def test_parses_plain_multiline_block(self, review_mod):
+        """Plain Severity: / Impact: labels (no bold) should be parsed."""
+        review_text = (
+            "## Code Quality\n\n"
+            "Severity: P1\n"
+            "Impact: Missing NaN guard causes silent incorrect output\n"
+            "Location: `diff_diff/staggered.py:L145`\n"
+            "Concrete fix: Use safe_inference()\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+        assert "NaN guard" in findings[0]["summary"]
+        assert not uncertain
+
+    def test_midline_severity_not_detected(self, review_mod):
+        """Severity markers embedded mid-line are not block starts — no uncertainty."""
+        review_text = (
+            "There is a Severity: P1 issue but the rest of the text\n"
+            "doesn't follow any recognized block structure at all\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        # Mid-line markers are not valid block starts — correctly returns ([], False)
+        assert findings == []
+        assert not uncertain
+
+    def test_midline_bold_severity_not_detected(self, review_mod):
+        """Bold severity mid-line (not at line start) is not a block start."""
+        review_text = (
+            "The review found **P1** issues but in a format\n"
+            "that the block parser cannot delimit properly.\n"
+        )
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        # Mid-line bold is not a valid block start — correctly returns ([], False)
+        assert findings == []
+        assert not uncertain
+
+    def test_bold_label_severity_triggers_uncertainty(self, review_mod):
+        """**Severity:** P1 format with no parseable summary → uncertain=True."""
+        review_text = "- **Severity:** P1\n"
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+        assert uncertain
+
+    def test_bold_inline_severity_triggers_uncertainty(self, review_mod):
+        """**Severity: P1** format with no parseable summary → uncertain=True."""
+        review_text = "- **Severity: P1**\n"
+        findings, uncertain = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+        assert uncertain
+
+    def test_ignores_multi_severity_prose(self, review_mod):
+        """Lines like 'P2/P3 items may exist' should not be parsed as findings."""
+        review_text = (
+            "P2/P3 items may exist. A PR does NOT need to be perfect.\n"
+            "If all previous P1+ findings are resolved, assessment should be good.\n"
+        )
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+
+    def test_ignores_assessment_lines(self, review_mod):
+        """Assessment criteria lines with severity labels should be skipped."""
+        review_text = (
+            "⛔ Blocker — One or more P0: silent correctness bugs\n"
+            "⚠️ Needs changes — One or more P1 (no P0s)\n"
+            "✅ Looks good — No unmitigated P0 or P1 findings.\n"
+        )
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+
+    def test_ignores_table_rows(self, review_mod):
+        """Findings tables from previous reviews should not be re-parsed."""
+        review_text = (
+            "| R1-P1-1 | P1 | Methodology | Missing NaN guard | foo.py:L10 | open |\n"
+            "| R1-P2-1 | P2 | Code Quality | Unused import | bar.py:L5 | addressed |\n"
+        )
+        findings, _ = review_mod.parse_review_findings(review_text, 2)
+        assert findings == []
+
+    def test_ignores_instructional_text(self, review_mod):
+        """Instructional text referencing severities should be skipped."""
+        review_text = (
+            "Focus on whether previous P0/P1 findings have been addressed.\n"
+            "If all previous P1+ findings are resolved, the assessment should be good.\n"
+        )
+        findings, _ = review_mod.parse_review_findings(review_text, 1)
+        assert findings == []
+
+
+# ---------------------------------------------------------------------------
+# Merge findings
+# ---------------------------------------------------------------------------
+
+
+class TestMergeFindings:
+    def test_matching_finding_stays_open(self, review_mod):
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard", "status": "open"}
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard", "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_at_loc = [
+            f for f in merged
+            if f["location"] == "foo.py:L10" and f["status"] == "open"
+        ]
+        assert len(open_at_loc) >= 1
+
+    def test_absent_finding_marked_addressed(self, review_mod):
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard", "status": "open"}
+        ]
+        current = []  # Finding was addressed
+        merged = review_mod.merge_findings(previous, current)
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        assert len(addressed) == 1
+        assert addressed[0]["location"] == "foo.py:L10"
+
+    def test_new_finding_added_as_open(self, review_mod):
+        previous = []
+        current = [
+            {"id": "R2-P0-1", "severity": "P0", "location": "bar.py:L5",
+             "section": "Methodology", "summary": "Missing check", "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        assert len(merged) == 1
+        assert merged[0]["status"] == "open"
+        assert merged[0]["location"] == "bar.py:L5"
+
+    def test_matching_with_shifted_line_numbers(self, review_mod):
+        """Same finding at different line ranges should still match via summary."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10-L12",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Should match (same severity, file, summary) — not create a false "addressed"
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
+
+    def test_matching_with_missing_location(self, review_mod):
+        """Finding with no location should still match on summary fingerprint."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Same severity + same summary = match. No false "addressed" record.
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
+
+    def test_multiple_findings_same_key(self, review_mod):
+        """Multiple previous findings with same key should not overwrite each other."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"},
+            {"id": "R1-P1-2", "severity": "P1", "location": "foo.py:L20",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"},
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"},
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        # One should match, one should be addressed
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        assert len(open_findings) == 1
+        assert len(addressed) == 1
+
+    def test_duplicate_no_location_findings_one_to_one(self, review_mod):
+        """Two prior no-location findings should not both match one current finding."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "",
+             "section": "Code Quality", "summary": "Missing NaN guard",
+             "status": "open"},
+            {"id": "R1-P1-2", "severity": "P1", "location": "",
+             "section": "Methodology", "summary": "Missing NaN guard",
+             "status": "open"},
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard",
+             "status": "open"},
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # One current + one prior matched = 1 open; one prior unmatched = 1 addressed
+        assert len(open_findings) == 1
+        assert len(addressed) == 1
+
+    def test_previous_missing_location_current_has_location(self, review_mod):
+        """Previous finding with no location, current has one → should match."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "staggered.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in staggered",
+             "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Should match via symmetric fallback — no false "addressed"
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
+
+    def test_same_basename_different_dirs_no_cross_match(self, review_mod):
+        """__init__.py in different dirs with same summary should NOT cross-match."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "diff_diff/__init__.py:L10",
+             "section": "Code Quality", "summary": "Missing type export", "status": "open"}
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "diff_diff/visualization/__init__.py:L5",
+             "section": "Code Quality", "summary": "Missing type export", "status": "open"}
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Different full paths: previous should be addressed, current stays open
+        assert len(open_findings) == 1
+        assert len(addressed) == 1
+
+    def test_long_summaries_dont_collide(self, review_mod):
+        """Two findings with same first 50 chars but different suffixes should NOT collapse."""
+        prefix = "a" * 50
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": prefix + " first issue details",
+             "status": "open"},
+            {"id": "R1-P1-2", "severity": "P1", "location": "foo.py:L20",
+             "section": "Code Quality", "summary": prefix + " second different issue",
+             "status": "open"},
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": prefix + " first issue details",
+             "status": "open"},
+            {"id": "R2-P1-2", "severity": "P1", "location": "foo.py:L20",
+             "section": "Code Quality", "summary": prefix + " second different issue",
+             "status": "open"},
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Both should match — neither dropped
+        assert len(open_findings) == 2
+        assert len(addressed) == 0
+
+    def test_same_summary_different_files_no_cross_match(self, review_mod):
+        """Two findings with same summary but different files should NOT cross-match."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard in estimator",
+             "status": "open"},
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "bar.py:L20",
+             "section": "Code Quality", "summary": "Missing NaN guard in estimator",
+             "status": "open"},
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Different files: previous should be addressed, current stays open
+        assert len(open_findings) == 1
+        assert open_findings[0]["location"] == "bar.py:L20"
+        assert len(addressed) == 1
+        assert addressed[0]["location"] == "foo.py:L10"
+
+
+# ---------------------------------------------------------------------------
+# estimate_cost — prefix matching regression
+# ---------------------------------------------------------------------------
+
+
+class TestEstimateCostPrefixRegression:
+    def test_mini_model_gets_mini_pricing(self, review_mod):
+        """gpt-4.1-mini snapshot should get mini pricing, not parent gpt-4.1."""
+        mini_cost = review_mod.estimate_cost(1_000_000, 1_000_000, "gpt-4.1-mini-2025-04-14")
+        parent_cost = review_mod.estimate_cost(1_000_000, 1_000_000, "gpt-4.1")
+        assert mini_cost is not None
+        assert parent_cost is not None
+        # Mini should be cheaper than parent
+        assert mini_cost != parent_cost
+
+    def test_o3_mini_gets_mini_pricing(self, review_mod):
+        """o3-mini snapshot should get o3-mini pricing, not o3."""
+        mini_cost = review_mod.estimate_cost(1_000_000, 1_000_000, "o3-mini-2025-01-31")
+        parent_cost = review_mod.estimate_cost(1_000_000, 1_000_000, "o3")
+        assert mini_cost is not None
+        assert parent_cost is not None
+        assert mini_cost != parent_cost
+
+
+# ---------------------------------------------------------------------------
+# Delta context derivation
+# ---------------------------------------------------------------------------
+
+
+class TestDeltaContextDerivation:
+    def test_delta_files_resolve_only_delta(self, review_mod, repo_root):
+        """resolve_changed_source_files with delta file list returns only delta files."""
+        # Simulate: full branch changed bacon.py and staggered.py, but delta only has bacon.py
+        delta_text = "M\tdiff_diff/bacon.py"
+        paths = review_mod.resolve_changed_source_files(delta_text, repo_root)
+        filenames = [os.path.basename(p) for p in paths]
+        assert "bacon.py" in filenames
+        # staggered.py should NOT be in the result (it's not in delta)
+        assert "staggered.py" not in filenames
+
+
+# ---------------------------------------------------------------------------
+# Review state — branch/base validation support
+# ---------------------------------------------------------------------------
+
+
+class TestReviewStateBranchValidation:
+    def test_stores_and_retrieves_branch_and_base(self, review_mod, tmp_path):
+        """write_review_state stores branch/base; parse_review_state returns them."""
+        path = str(tmp_path / "review-state.json")
+        review_mod.write_review_state(
+            path=path,
+            commit_sha="abc123",
+            base_ref="main",
+            branch="feature/test",
+            review_round=1,
+            findings=[],
+        )
+        # Read back and verify fields are present
+        import json
+        with open(path) as f:
+            data = json.load(f)
+        assert data["branch"] == "feature/test"
+        assert data["base_ref"] == "main"
+
+
+# ---------------------------------------------------------------------------
+# End-to-end: parse then merge pipeline
+# ---------------------------------------------------------------------------
+
+
+class TestParseThenMerge:
+    def test_line_shift_does_not_cause_churn(self, review_mod):
+        """Same finding at different line numbers should merge as 1 open, 0 addressed."""
+        review_r1 = "**P1** Missing NaN guard in `foo.py:L10`\n"
+        review_r2 = "**P1** Missing NaN guard in `foo.py:L12`\n"
+        findings_r1, _ = review_mod.parse_review_findings(review_r1, 1)
+        findings_r2, _ = review_mod.parse_review_findings(review_r2, 2)
+        assert len(findings_r1) == 1
+        assert len(findings_r2) == 1
+        merged = review_mod.merge_findings(findings_r1, findings_r2)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
+
+    def test_md_file_line_shift_does_not_cause_churn(self, review_mod):
+        """Same finding on a .md file at different line numbers should merge as 1 open."""
+        review_r1 = "**P1** Missing docs in `ai-review-local.md:L10`\n"
+        review_r2 = "**P1** Missing docs in `ai-review-local.md:L20`\n"
+        findings_r1, _ = review_mod.parse_review_findings(review_r1, 1)
+        findings_r2, _ = review_mod.parse_review_findings(review_r2, 2)
+        assert len(findings_r1) == 1
+        assert len(findings_r2) == 1
+        merged = review_mod.merge_findings(findings_r1, findings_r2)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
+
+    def test_parse_uncertain_does_not_advance_state(self, review_mod, tmp_path):
+        """When parse_uncertain fires, review-state.json should not be modified."""
+        state_path = str(tmp_path / "review-state.json")
+        # Write initial state
+        review_mod.write_review_state(
+            path=state_path,
+            commit_sha="initial123",
+            base_ref="main",
+            branch="feature/x",
+            review_round=1,
+            findings=[{"id": "R1-P1-1", "severity": "P1", "summary": "Test", "status": "open"}],
+        )
+        initial_mtime = os.path.getmtime(state_path)
+
+        # Simulate parse_uncertain scenario
+        unparseable_review = "- **Severity:** P1\n"  # Will return ([], True)
+        findings, uncertain = review_mod.parse_review_findings(unparseable_review, 2)
+        assert uncertain
+        assert findings == []
+
+        # The state file should NOT have been modified
+        # (in production, main() skips write_review_state when uncertain)
+        current_mtime = os.path.getmtime(state_path)
+        assert current_mtime == initial_mtime
+
+        # Verify original state is intact
+        stored_findings, stored_round = review_mod.parse_review_state(state_path)
+        assert stored_round == 1
+        assert stored_findings[0]["id"] == "R1-P1-1"
+
+
+# ---------------------------------------------------------------------------
+# validate_review_state — comprehensive validation
+# ---------------------------------------------------------------------------
+
+
+class TestValidateReviewState:
+    def test_valid_state_returns_true(self, review_mod, tmp_path):
+        path = str(tmp_path / "review-state.json")
+        review_mod.write_review_state(
+            path=path, commit_sha="abc123", base_ref="main",
+            branch="feature/test", review_round=1,
+            findings=[{"id": "R1-P1-1", "severity": "P1",
+                       "summary": "Test", "status": "open"}],
+        )
+        findings, rnd, commit, valid = review_mod.validate_review_state(
+            path, "feature/test", "main"
+        )
+        assert valid
+        assert commit == "abc123"
+        assert len(findings) == 1
+
+    def test_branch_mismatch_returns_false(self, review_mod, tmp_path):
+        path = str(tmp_path / "review-state.json")
+        review_mod.write_review_state(
+            path=path, commit_sha="abc123", base_ref="main",
+            branch="feature/old", review_round=1, findings=[],
+        )
+        _, _, _, valid = review_mod.validate_review_state(
+            path, "feature/new", "main"
+        )
+        assert not valid
+
+    def test_schema_mismatch_returns_false(self, review_mod, tmp_path):
+        state_file = tmp_path / "review-state.json"
+        state_file.write_text(json.dumps({"schema_version": 999}))
+        _, _, _, valid = review_mod.validate_review_state(
+            str(state_file), "b", "main"
+        )
+        assert not valid
+
+    def test_missing_file_returns_false(self, review_mod):
+        _, _, _, valid = review_mod.validate_review_state(
+            "/nonexistent.json", "b", "main"
+        )
+        assert not valid
+
+    def test_malformed_finding_returns_false(self, review_mod, tmp_path):
+        """Any malformed finding dict should invalidate delta mode entirely."""
+        state_file = tmp_path / "review-state.json"
+        state = {
+            "schema_version": 1,
+            "last_reviewed_commit": "abc123",
+            "branch": "feature/test",
+            "base_ref": "main",
+            "review_round": 1,
+            "findings": [
+                {"id": "R1-P1-1", "severity": "P1"},  # missing summary, status
+            ],
+        }
+        state_file.write_text(json.dumps(state))
+        _, _, _, valid = review_mod.validate_review_state(
+            str(state_file), "feature/test", "main"
+        )
+        assert not valid  # fail closed on malformed finding
+
+
+# ---------------------------------------------------------------------------
+# Include-files path confinement
+# ---------------------------------------------------------------------------
+
+
+class TestIncludeFilesConfinement:
+    """Verify --include-files rejects paths outside repo root."""
+
+    def test_rejects_absolute_path(self, review_mod, repo_root, capsys):
+        """Absolute paths should be rejected."""
+        # Simulate the path resolution logic from main()
+        name = "/etc/passwd"
+        assert os.path.isabs(name)
+        # The script rejects absolute paths before even resolving
+
+    def test_rejects_traversal(self, review_mod, repo_root):
+        """../ traversal should be detected after realpath normalization."""
+        candidate = os.path.join(repo_root, "../../../etc/passwd")
+        candidate = os.path.realpath(candidate)
+        repo_root_real = os.path.realpath(repo_root)
+        assert not candidate.startswith(repo_root_real + os.sep)