igerber · igerber · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/TODO.md b/TODO.md
@@ -52,6 +52,8 @@ Deferred items from PR reviews that were not addressed before merge.
 | ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails; fixing requires sparse least-squares alternatives) |
 | EfficientDiD: API docs / tutorial page for new public estimator | `docs/` | #192 | Medium |
 | Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
+| CallawaySantAnna per-cell ATT(g,t) SEs under survey use influence-function variance, not full design-based TSL with strata/PSU/FPC. Design effects enter at aggregation via WIF and survey df. Full per-cell TSL would require constructing unit-level influence functions on the global index and passing through `compute_survey_vcov()`. | `staggered.py` | — | Medium |
+| EfficientDiD hausman_pretest() clustered covariance uses stale `n_cl` after filtering non-finite EIF rows — should recompute effective cluster count and remap indices after `row_finite` filtering | `efficient_did.py` | #230 | Medium |
 | TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
 | ContinuousDiD event-study aggregation does not filter by `anticipation` — uses all (g,t) cells instead of anticipation-filtered subset; pre-existing in both survey and non-survey paths | `continuous_did.py` | #226 | Medium |
 | Survey design resolution/collapse patterns are inconsistent across panel estimators — ContinuousDiD rebuilds unit-level design in SE code, EfficientDiD builds once in fit(), StackedDiD re-resolves on stacked data; extract shared helpers for panel-to-unit collapse, post-filter re-resolution, and metadata recomputation | `continuous_did.py`, `efficient_did.py`, `stacked_did.py` | #226 | Low |

diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py
diff --git a/diff_diff/imputation_results.py b/diff_diff/imputation_results.py
@@ -139,6 +139,8 @@ class ImputationDiDResults:
     bootstrap_results: Optional[ImputationBootstrapResults] = field(default=None, repr=False)
     # Internal: stores data needed for pretrend_test()
     _estimator_ref: Optional[Any] = field(default=None, repr=False)
+    # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
+    survey_metadata: Optional[Any] = field(default=None, repr=False)
 
     def __repr__(self) -> str:
         """Concise string representation."""
@@ -182,6 +184,27 @@ def summary(self, alpha: Optional[float] = None) -> str:
             "",
         ]
 
+        # Survey design info
+        if self.survey_metadata is not None:
+            sm = self.survey_metadata
+            lines.extend(
+                [
+                    "-" * 85,
+                    "Survey Design".center(85),
+                    "-" * 85,
+                    f"{'Weight type:':<30} {sm.weight_type:>10}",
+                ]
+            )
+            if sm.n_strata is not None:
+                lines.append(f"{'Strata:':<30} {sm.n_strata:>10}")
+            if sm.n_psu is not None:
+                lines.append(f"{'PSU/Cluster:':<30} {sm.n_psu:>10}")
+            lines.append(f"{'Effective sample size:':<30} {sm.effective_n:>10.1f}")
+            lines.append(f"{'Design effect (DEFF):':<30} {sm.design_effect:>10.2f}")
+            if sm.df_survey is not None:
+                lines.append(f"{'Survey d.f.:':<30} {sm.df_survey:>10}")
+            lines.extend(["-" * 85, ""])
+
         # Overall ATT
         lines.extend(
             [

diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py
@@ -390,24 +390,18 @@ def _validate_weights(weights, weight_type, n):
     """Validate weights array and weight_type for solve_ols/LinearRegression."""
     if weight_type not in _VALID_WEIGHT_TYPES:
         raise ValueError(
-            f"weight_type must be one of {_VALID_WEIGHT_TYPES}, "
-            f"got '{weight_type}'"
+            f"weight_type must be one of {_VALID_WEIGHT_TYPES}, " f"got '{weight_type}'"
         )
     if weights is not None:
         weights = np.asarray(weights, dtype=np.float64)
         if weights.shape[0] != n:
-            raise ValueError(
-                f"weights length ({weights.shape[0]}) must match "
-                f"X rows ({n})"
-            )
+            raise ValueError(f"weights length ({weights.shape[0]}) must match " f"X rows ({n})")
         if np.any(np.isnan(weights)):
             raise ValueError("Weights contain NaN values")
         if np.any(np.isinf(weights)):
             raise ValueError("Weights contain Inf values")
         if np.any(weights < 0):
-            raise ValueError(
-                "Weights must be non-negative"
-            )
+            raise ValueError("Weights must be non-negative")
         if weight_type == "fweight":
             fractional = weights - np.round(weights)
             if np.any(np.abs(fractional) > 1e-10):
@@ -693,13 +687,9 @@ def solve_ols(
                         weights=weights,
                         weight_type=weight_type,
                     )
-                    vcov_out = _expand_vcov_with_nan(
-                        vcov_reduced, _original_X.shape[1], kept_cols
-                    )
+                    vcov_out = _expand_vcov_with_nan(vcov_reduced, _original_X.shape[1], kept_cols)
                 else:
-                    vcov_out = np.full(
-                        (_original_X.shape[1], _original_X.shape[1]), np.nan
-                    )
+                    vcov_out = np.full((_original_X.shape[1], _original_X.shape[1]), np.nan)
             else:
                 vcov_out = _compute_robust_vcov_numpy(
                     _original_X,
@@ -1122,6 +1112,7 @@ def solve_logit(
     tol: float = 1e-8,
     check_separation: bool = True,
     rank_deficient_action: str = "warn",
+    weights: Optional[np.ndarray] = None,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
     Fit logistic regression via IRLS (Fisher scoring).
@@ -1147,6 +1138,13 @@ def solve_logit(
         - "warn": Emit warning and drop columns (default)
         - "error": Raise ValueError
         - "silent": Drop columns silently
+    weights : np.ndarray, optional
+        Survey/observation weights of shape (n_samples,). When provided,
+        the IRLS working weights become ``weights * mu * (1 - mu)``
+        instead of ``mu * (1 - mu)``. This produces the survey-weighted
+        maximum likelihood estimator, matching R's ``svyglm(family=binomial)``.
+        When None (default), behavior is identical to unweighted logistic
+        regression.
 
     Returns
     -------
@@ -1203,11 +1201,16 @@ def solve_logit(
         mu = np.clip(mu, 1e-10, 1 - 1e-10)
 
         # Working weights and working response
-        w = mu * (1.0 - mu)
-        z = eta + (y - mu) / w
+        w_irls = mu * (1.0 - mu)
+        z = eta + (y - mu) / w_irls
+
+        if weights is not None:
+            w_total = weights * w_irls
+        else:
+            w_total = w_irls
 
         # Weighted least squares: solve (X'WX) beta = X'Wz
-        sqrt_w = np.sqrt(w)
+        sqrt_w = np.sqrt(w_total)
         Xw = X_solve * sqrt_w[:, None]
         zw = z * sqrt_w
         beta_new, _, _, _ = np.linalg.lstsq(Xw, zw, rcond=None)
@@ -1593,10 +1596,7 @@ def fit(
                 _use_survey_vcov = self.survey_design.needs_survey_vcov
                 # Canonicalize weights from survey_design to ensure consistency
                 # between coefficient estimation and survey vcov computation
-                if (
-                    self.weights is not None
-                    and self.weights is not self.survey_design.weights
-                ):
+                if self.weights is not None and self.weights is not self.survey_design.weights:
                     warnings.warn(
                         "Explicit weights= differ from survey_design.weights. "
                         "Using survey_design weights for both coefficient "
@@ -1609,9 +1609,7 @@ def fit(
                 self.weight_type = self.survey_design.weight_type
 
         if self.weights is not None:
-            self.weights = _validate_weights(
-                self.weights, self.weight_type, X.shape[0]
-            )
+            self.weights = _validate_weights(self.weights, self.weight_type, X.shape[0])
 
         # Inject cluster as PSU for survey variance when no PSU specified.
         # Use a local variable to avoid mutating self.survey_design, which
@@ -1622,7 +1620,9 @@ def fit(
             and _effective_survey_design is not None
             and _use_survey_vcov
         ):
-            from diff_diff.survey import ResolvedSurveyDesign as _RSD, _inject_cluster_as_psu
+            from diff_diff.survey import ResolvedSurveyDesign as _RSD
+            from diff_diff.survey import _inject_cluster_as_psu
+
             if isinstance(_effective_survey_design, _RSD) and _effective_survey_design.psu is None:
                 _effective_survey_design = _inject_cluster_as_psu(
                     _effective_survey_design, effective_cluster_ids
@@ -1864,9 +1864,7 @@ def get_inference(
         # Use project-standard NaN-safe inference (returns all-NaN when SE <= 0)
         from diff_diff.utils import safe_inference
 
-        t_stat, p_value, conf_int = safe_inference(
-            coef, se, alpha=effective_alpha, df=effective_df
-        )
+        t_stat, p_value, conf_int = safe_inference(coef, se, alpha=effective_alpha, df=effective_df)
 
         return InferenceResult(
             coefficient=coef,