RCT Benchmark: t-test vs Linear Regression vs DML ATE#
import numpy as np
import pandas as pd
from causalis.data.generators import generate_rct
from causalis.data.causaldata import CausalData
from causalis.inference.atte.ttest import ttest
from causalis.inference.ate.dml_ate_source import dml_ate_source
np.random.seed(42)
Generate RCT data#
We’ll generate a balanced (50/50) RCT with a continuous outcome where the treated group’s mean exceeds control by 0.5 units.
# Generate clean RCT without legacy ancillary columns
n = 10000
theta = 0.5
df = generate_rct(
n=n,
split=0.5,
random_state=42,
target_type="normal",
target_params={"mean": {"A": 0.0, "B": theta}, "std": 1.0},
k=5, # 5 pre-treatment covariates X independent of T
add_ancillary=False # <- no legacy/post-treatment columns
)
# Use only baseline X columns as confounders
confounders = [c for c in df.columns if c.startswith("x")]
# Wrap in CausalData with new column names
causal_data = CausalData(
df=df,
treatment='t',
outcome='y',
confounders=confounders
)
causal_data.df.head(100)
| y | t | x1 | x2 | x3 | x4 | x5 | |
|---|---|---|---|---|---|---|---|
| 0 | 0.302852 | 0.0 | 0.304717 | -1.039984 | 0.750451 | 0.940565 | -1.951035 |
| 1 | -0.942206 | 1.0 | -1.302180 | 0.127840 | -0.316243 | -0.016801 | -0.853044 |
| 2 | 0.910524 | 1.0 | 0.879398 | 0.777792 | 0.066031 | 1.127241 | 0.467509 |
| 3 | 2.332408 | 1.0 | -0.859292 | 0.368751 | -0.958883 | 0.878450 | -0.049926 |
| 4 | -1.012732 | 0.0 | -0.184862 | -0.680930 | 1.222541 | -0.154529 | -0.428328 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | 0.305495 | 0.0 | -0.339258 | 1.063852 | -1.141938 | 0.006339 | 2.597674 |
| 96 | -0.865824 | 1.0 | 0.223080 | 1.433215 | 0.091520 | 0.580777 | -0.056783 |
| 97 | -0.027818 | 1.0 | -0.170408 | -0.779482 | 0.430301 | -0.851537 | 0.665585 |
| 98 | -1.263472 | 0.0 | 1.085287 | 0.366531 | -0.286249 | 0.453966 | -0.308673 |
| 99 | -0.421367 | 0.0 | 0.935547 | -1.831406 | -0.335607 | -1.990812 | -1.495061 |
100 rows × 7 columns
Wrap in CausalData#
We provide a few covariates as confounders for DML (although the data is truly randomized).
from causalis.eda import CausalEDA
eda = CausalEDA(causal_data)
# 1) Outcome statistics by treatment
eda.outcome_stats()
| count | mean | std | min | p10 | p25 | median | p75 | p90 | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| treatment | ||||||||||
| 0.0 | 5010 | -0.009797 | 1.005403 | -3.482370 | -1.290654 | -0.684156 | -0.014663 | 0.657298 | 1.286337 | 3.627004 |
| 1.0 | 4990 | 0.519886 | 1.007209 | -3.021571 | -0.746023 | -0.165270 | 0.503424 | 1.206379 | 1.790902 | 3.885919 |
# Shows means of confounders for control/treated groups, absolute differences, and SMD values
confounders_balance_df = eda.confounders_means()
display(confounders_balance_df)
| mean_t_0 | mean_t_1 | abs_diff | smd | |
|---|---|---|---|---|
| confounders | ||||
| x2 | -0.014765 | 0.018033 | 0.032798 | 0.032770 |
| x1 | -0.022735 | -0.008229 | 0.014505 | 0.014486 |
| x5 | 0.007285 | -0.006229 | 0.013514 | -0.013459 |
| x3 | 0.014590 | 0.007731 | 0.006859 | -0.006917 |
| x4 | -0.000891 | -0.000040 | 0.000850 | 0.000838 |
1) t-test (difference in means)#
tt_res = ttest(causal_data, confidence_level=0.95)
tt_res
{'p_value': 1.176719325899924e-147,
'absolute_difference': 0.5296827782315828,
'absolute_ci': (0.49023151024069744, 0.5691340462224682),
'relative_difference': 5406.753548476556,
'relative_ci': (5004.053494844908, 5809.453602108204)}
2) Linear regression#
The coefficient on treatment equals the difference in group means in an RCT.
# Python
import numpy as np
import statsmodels.api as sm
# Outcome
y = causal_data.target.to_numpy()
# Base X (no centering)
X_base = causal_data.df[confounders].to_numpy()
xbar = X_base.mean(axis=0) # means of confounders, shape (p,)
# Treatment and interactions
T = causal_data.treatment.to_numpy().reshape(-1, 1)
TX = X_base * T
# Design matrix: intercept + T + X + T*X
X_design = np.column_stack([np.ones(len(T)), T, X_base, TX])
# Fit OLS with robust SE
res = sm.OLS(y, X_design).fit(cov_type="HC3")
# Dimensions and index bookkeeping
p = X_base.shape[1]
idx_const = 0
idx_T = 1
idx_X_start = 2
idx_X_end = idx_X_start + p # exclusive
idx_TX_start = idx_X_end
idx_TX_end = idx_TX_start + p # exclusive
# Parameter vector is [const, beta_T, beta_X (p), gamma_TX (p)]
beta = res.params
V = res.cov_params()
# Average treatment effect under the linear-interaction model:
# theta = beta_T + xbar' * gamma
theta_hat = float(beta[idx_T] + (xbar @ beta[idx_TX_start:idx_TX_end]))
# Delta-method variance: Var(a' beta) = a' V a
a = np.zeros_like(beta)
a[idx_T] = 1.0
a[idx_TX_start:idx_TX_end] = xbar
var_theta = float(a @ V @ a)
se_theta = float(np.sqrt(max(var_theta, 0.0)))
# 95% CI (normal approx)
from scipy.stats import norm
z = norm.ppf(0.975)
ci_low = theta_hat - z * se_theta
ci_high = theta_hat + z * se_theta
# Two-sided p-value for H0: theta = 0
zstat = theta_hat / se_theta if se_theta > 0 else np.inf
pval = 2 * (1 - norm.cdf(abs(zstat)))
theta_hat, (ci_low, ci_high), se_theta, pval
(0.5294584904981685,
(np.float64(0.4899759986996098), np.float64(0.5689409822967272)),
0.02014449862854194,
np.float64(0.0))
3) Double Machine Learning (ATE)#
We estimate ATE using DoubleML with default learners.
dml_res = dml_ate_source(causal_data, n_folds=3, confidence_level=0.95)
dml_res
{'coefficient': 0.5438121651196453,
'std_error': 0.021106462512239948,
'p_value': 2.1780238928091667e-146,
'confidence_interval': (0.5024442587546102, 0.5851800714846803),
'model': <doubleml.irm.irm.DoubleMLIRM at 0x1683801a0>}
Compare estimates#
tt_ci_low, tt_ci_high = tt_res['absolute_ci'] # from the t-test
lin_ci_low, lin_ci_high = ci_low, ci_high # from your delta-method calc
dml_ci_low, dml_ci_high = dml_res['confidence_interval'] # from DoubleML
comparison = pd.DataFrame({
'method': ['t-test', 'linear_regression', 'dml_ate'],
'estimate': [
tt_res['absolute_difference'],
theta_hat,
dml_res['coefficient']
],
'ci_lower': [
tt_ci_low,
lin_ci_low,
dml_ci_low
],
'ci_upper': [
tt_ci_high,
lin_ci_high,
dml_ci_high
]
})
comparison
| method | estimate | ci_lower | ci_upper | |
|---|---|---|---|---|
| 0 | t-test | 0.529683 | 0.490232 | 0.569134 |
| 1 | linear_regression | 0.529458 | 0.489976 | 0.568941 |
| 2 | dml_ate | 0.543812 | 0.502444 | 0.585180 |
Ground truth theta = 0.5