Compare Implementation of DML IRM in Causalis and DML IRM in DoubleMl#
In Causalis we have dml_ate_source() implementation. It calls dml_irm_obj.fit() from DoubleML. Let’s compare it with Causalis dml_ate()
Using DGP from [Linear and Nonlinear Data Generating Process benchmarking](file:///Users/ioannmartynov/PycharmProjects/Ckit/docs/_build/html/research/dgp_benchmarking.html)
import numpy as np
from typing import List, Dict, Any
from causalis.data import CausalDatasetGenerator
confounder_specs: List[Dict[str, Any]] = [
{"name": "tenure_months", "dist": "normal", "mu": 24, "sd": 12},
{"name": "avg_sessions_week", "dist": "normal", "mu": 5, "sd": 2},
{"name": "spend_last_month", "dist": "uniform", "a": 0, "b": 200},
{"name": "premium_user", "dist": "bernoulli","p": 0.25},
{"name": "urban_resident", "dist": "bernoulli","p": 0.60},
]
# Moderate, sensible effects by column name (linear, well-specified)
# Outcome: higher sessions, tenure, spend, premium, urban -> higher Y
beta_y_map = {
"tenure_months": 0.05, # ~0.6 SD shift at +1 SD (12 months)
"avg_sessions_week": 0.60, # strong engagement signal
"spend_last_month": 0.005, # scale 0..200 => up to ~1 shift
"premium_user": 0.80,
"urban_resident": 0.20,
}
# Treatment score: moderate dependence on engagement, spend, premium, urban
beta_d_map = {
"tenure_months": 0.08,
"avg_sessions_week": 0.12,
"spend_last_month": 0.004,
"premium_user": 0.25,
"urban_resident": 0.10,
}
def expand_beta_from_specs(specs: List[Dict[str, Any]], beta_map: Dict[str, float]) -> np.ndarray:
"""Create β aligned to the generator's X column order from confounder_specs."""
betas = []
for spec in specs:
name = spec.get("name", "")
dist = str(spec.get("dist", "normal")).lower()
if dist in ("normal", "uniform", "bernoulli"):
betas.append(beta_map.get(name, 0.0))
else:
raise ValueError(f"Unsupported dist in this simple setup: {dist}")
return np.asarray(betas, dtype=float)
beta_y = expand_beta_from_specs(confounder_specs, beta_y_map)
beta_d = expand_beta_from_specs(confounder_specs, beta_d_map)
gen = CausalDatasetGenerator(
theta=0.80, # constant treatment effect
tau=None, # use theta
beta_y=beta_y, # linear connection between X and Y
beta_d=beta_d, # linear connection between X and D
g_y=None, g_d=None, # no nonlinearities
alpha_y=0.0, # no intercept
alpha_d=0.0, # no intercept
sigma_y=1.0, # noise of y
outcome_type="continuous", # Gaussian Y
confounder_specs=confounder_specs,
u_strength_d=0.0, # strength of latent confounder influence on treatment
u_strength_y=0.0, # strength of latent confounder influence on outcome
propensity_sharpness=1.0, # increase to make overlap harder
target_d_rate=0.20, # rate of treatment assignment
seed=123 # random seed for reproducibility
)
n = 10_000 # Number of observations
df = gen.generate(n)
print("Treatment share ≈", df["d"].mean())
true_ate = float(df["cate"].mean())
print(f"Ground-truth ATE from the DGP: {true_ate:.3f}")
# Ground-truth ATT (on the natural scale): E[tau(X) | T=1] = mean CATE among the treated
true_att = float(df.loc[df["d"] == 1, "cate"].mean())
print(f"Ground-truth ATT from the DGP: {true_att:.3f}")
Treatment share ≈ 0.2052
Ground-truth ATE from the DGP: 0.800
Ground-truth ATT from the DGP: 0.800
from causalis.data import CausalData
causal_data = CausalData(
df=df,
treatment="d",
outcome="y",
confounders=["tenure_months",
"avg_sessions_week",
"spend_last_month",
"premium_user",
"premium_user",
"urban_resident"]
)
causal_data.df.head()
| y | d | tenure_months | avg_sessions_week | spend_last_month | premium_user | urban_resident | |
|---|---|---|---|---|---|---|---|
| 0 | 1.903910 | 0.0 | 12.130544 | 4.056687 | 181.570607 | 0.0 | 0.0 |
| 1 | 3.388144 | 0.0 | 19.586560 | 1.671561 | 182.793598 | 0.0 | 0.0 |
| 2 | 8.456512 | 1.0 | 39.455103 | 5.452889 | 125.185708 | 1.0 | 1.0 |
| 3 | 5.535970 | 1.0 | 26.327693 | 5.051629 | 4.932905 | 0.0 | 1.0 |
| 4 | 4.965140 | 1.0 | 35.042771 | 4.933996 | 23.577407 | 0.0 | 0.0 |
ATE#
from causalis.inference.ate import dml_ate
# Estimate Average Treatment Effect (ATE)
ate_result_causalis = dml_ate(causal_data, n_folds=4, normalize_ipw=False, store_diagnostic_data=False, random_state=123)
from causalis.inference.ate import dml_ate_source
# Estimate Average Treatment Effect (ATE)
ate_result_doubleml = dml_ate_source(causal_data, n_folds=4)
print("Real ATE = 0.8")
print(f"Estimated with Causalis = {ate_result_causalis.get('coefficient')} in {ate_result_causalis.get('confidence_interval')}")
print(f"Estimated with DoubleML = {ate_result_doubleml.get('coefficient')} in {ate_result_doubleml.get('confidence_interval')}")
Real ATE = 0.8
Estimated with Causalis = 0.7506313322797796 in (0.6668061755942251, 0.8344564889653342)
Estimated with DoubleML = 0.7951243102839649 in (0.7340549132666624, 0.8561937073012674)
ATTE#
from causalis.inference.atte import dml_atte
# Estimate Average Treatment Effect on Treatment (ATTE)
atte_result = dml_atte(causal_data, n_folds=4, normalize_ipw=False, store_diagnostic_data=False, random_state=123)
from causalis.inference.atte import dml_atte_source
# Estimate Average Treatment Effect (ATE)
ate_result_doubleml = dml_atte_source(causal_data, n_folds=4)
print("Real ATTE = 0.8")
print(f"Estimated with Causalis = {atte_result.get('coefficient')} in {atte_result.get('confidence_interval')}")
print(f"Estimated with DoubleML = {ate_result_doubleml.get('coefficient')} in {ate_result_doubleml.get('confidence_interval')}")
Real ATTE = 0.8
Estimated with Causalis = 0.8238669785039335 in (0.7558326760362799, 0.8919012809715872)
Estimated with DoubleML = 0.7951243102839649 in (0.7340549132666624, 0.8561937073012674)
Conclusion#
As we see estimates with Causalis are very close to the DoubleML. Nuance functions were out of box catboost model