Address local AI review P2/P3 findings

igerber · claude · igerber · commit d879c88c159d · 2026-04-19T09:52:51.000-04:00
- P2 (wooldridge): Extract shared `_warn_and_fill_nan_cohort(df, cohort,
  stacklevel)` helper used by both `_filter_sample` and `fit()`. Removes
  the copy-paste warning block that was flagged as a future drift risk.

- P2 (tests): Add `test_inf_first_treat_warning_counts_rows_not_units`
  on a 4-unit x 3-period panel. 2 units carry inf across all 3 periods
  (6 inf rows, 2 inf units) — the warning must report 6, not 2, because
  `.replace(inf, 0)` is row-level.

- P3 (utils wording): The `_compute_outcome_changes` excess-drop warning
  said "gaps or NaN outcomes" but the code actually counts all NaN
  first-differences. Rephrased to "additional NaN first-differences
  (e.g. NaN outcomes or unit-period gaps upstream)" so the message
  doesn't over-claim what the helper can detect.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/utils.py b/diff_diff/utils.py
@@ -936,8 +936,9 @@ def _compute_outcome_changes(
             warnings.warn(
                 f"check_parallel_trends dropped {n_dropped} row(s) with NaN "
                 f"first-differences; {n_units_observed} are the expected "
-                f"first-period-per-unit drops, and {n_unexpected_drops} came "
-                f"from gaps or NaN outcomes. Parallel-trend statistics are "
+                f"first-period-per-unit drops, and {n_unexpected_drops} are "
+                f"additional NaN first-differences (e.g. NaN outcomes or "
+                f"unit-period gaps upstream). Parallel-trend statistics are "
                 f"computed on the remaining rows.",
                 UserWarning,
                 stacklevel=3,
diff --git a/diff_diff/wooldridge.py b/diff_diff/wooldridge.py
@@ -113,6 +113,26 @@ def _resolve_survey_for_wooldridge(survey_design, sample, cluster_ids, cluster_n
     return resolved, survey_weights, survey_weight_type, survey_metadata, df_inf
 
 
+def _warn_and_fill_nan_cohort(df: pd.DataFrame, cohort: str, stacklevel: int) -> pd.DataFrame:
+    """Fill NaN cohort with 0 (never-treated) and warn with the row count.
+
+    Used by both `_filter_sample` (pre-fit) and `WooldridgeDiD.fit()` so the
+    silent recategorization is surfaced on whichever entry path the caller
+    hits first. See REGISTRY.md §WooldridgeDiD (axis-E silent coercion).
+    """
+    n_nan_cohort = int(df[cohort].isna().sum())
+    if n_nan_cohort > 0:
+        warnings.warn(
+            f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 "
+            f"and treating the corresponding units as never-treated. Pass "
+            f"an explicit never-treated marker (0) if this is not intended.",
+            UserWarning,
+            stacklevel=stacklevel,
+        )
+    df[cohort] = df[cohort].fillna(0)
+    return df
+
+
 def _filter_sample(
     data: pd.DataFrame,
     unit: str,
@@ -129,20 +149,7 @@ def _filter_sample(
     (see _build_interaction_matrix).
     """
     df = data.copy()
-    # Normalise never-treated: fill NaN cohort with 0. Report the row count so
-    # callers can see how many rows were recategorized as never-treated — a
-    # silent recategorization here would quietly move units between the
-    # treated and control sides of the estimator (axis-E silent coercion).
-    n_nan_cohort = int(df[cohort].isna().sum())
-    if n_nan_cohort > 0:
-        warnings.warn(
-            f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 "
-            f"and treating the corresponding units as never-treated. Pass "
-            f"an explicit never-treated marker (0) if this is not intended.",
-            UserWarning,
-            stacklevel=3,
-        )
-    df[cohort] = df[cohort].fillna(0)
+    df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=3)
 
     treated_mask = df[cohort] > 0
 
@@ -409,19 +416,7 @@ def fit(
             ``NotImplementedError``.
         """
         df = data.copy()
-        # See `_filter_sample` for the analogous warning; fit() does its own
-        # fillna earlier in the pipeline so we warn here too to cover the
-        # direct-fit path.
-        n_nan_cohort = int(df[cohort].isna().sum())
-        if n_nan_cohort > 0:
-            warnings.warn(
-                f"{n_nan_cohort} row(s) have NaN cohort values; filling with 0 "
-                f"and treating the corresponding units as never-treated. Pass "
-                f"an explicit never-treated marker (0) if this is not intended.",
-                UserWarning,
-                stacklevel=2,
-            )
-        df[cohort] = df[cohort].fillna(0)
+        df = _warn_and_fill_nan_cohort(df, cohort, stacklevel=2)
 
         # 0a. Validate cohort is time-invariant within unit
         cohort_per_unit = df.groupby(unit)[cohort].nunique()
diff --git a/tests/test_continuous_did.py b/tests/test_continuous_did.py
@@ -675,6 +675,36 @@ def test_no_inf_first_treat_no_warning(self):
         inf_warnings = [x for x in w if "inf in 'first_treat'" in str(x.message)]
         assert inf_warnings == []
 
+    def test_inf_first_treat_warning_counts_rows_not_units(self):
+        """The warning counts affected rows (not units). On a panel with
+        multiple periods per unit, each inf row must count separately so the
+        message surface matches the per-row semantics of `.replace(inf, 0)`."""
+        # Build a 4-unit, 3-period panel (12 rows). 2 units have inf across
+        # all 3 periods → 6 inf rows, 2 units, so row-count != unit-count.
+        rows = []
+        for unit in range(4):
+            ft = np.inf if unit < 2 else 2.0
+            dose = 0.0 if unit < 2 else 1.0
+            for t in range(1, 4):
+                rows.append({
+                    "unit": unit, "period": t, "outcome": float(unit + t),
+                    "first_treat": ft, "dose": dose,
+                })
+        data = pd.DataFrame(rows)
+        est = ContinuousDiD()
+
+        with pytest.warns(
+            UserWarning,
+            match=r"6 row\(s\) have inf in 'first_treat'",
+        ):
+            try:
+                est.fit(data, "outcome", "unit", "period", "first_treat", "dose")
+            except Exception:
+                # Downstream validation may reject this minimal panel (too few
+                # treated for OLS). We only care that the inf-row warning fires
+                # with the correct row count.
+                pass
+
     def test_custom_dvals(self):
         data = generate_continuous_did_data(n_units=100, n_periods=3, seed=42)
         custom_grid = np.array([1.0, 2.0, 3.0])
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -871,7 +871,7 @@ def test_warns_on_nan_outcomes_with_excess_drop_count(self):
 
         with pytest.warns(
             UserWarning,
-            match=r"check_parallel_trends dropped \d+ row\(s\).*first-period-per-unit",
+            match=r"check_parallel_trends dropped \d+ row\(s\).*additional NaN first-differences",
         ):
             _compute_outcome_changes(
                 df, outcome="outcome", time="period",