RCT Benchmark: t-test vs Linear Regression vs DML ATE#

import numpy as np
import pandas as pd

from causalis.data.generators import generate_rct
from causalis.data.causaldata import CausalData
from causalis.inference.atte.ttest import ttest
from causalis.inference.ate.dml_ate_source import dml_ate_source

np.random.seed(42)

Generate RCT data#

We’ll generate a balanced (50/50) RCT with a continuous outcome where the treated group’s mean exceeds control by 0.5 units.

# Generate clean RCT without legacy ancillary columns
n = 10000
theta = 0.5

df = generate_rct(
    n=n,
    split=0.5,
    random_state=42,
    target_type="normal",
    target_params={"mean": {"A": 0.0, "B": theta}, "std": 1.0},
    k=5,                  # 5 pre-treatment covariates X independent of T
    add_ancillary=False   # <- no legacy/post-treatment columns
)

# Use only baseline X columns as confounders
confounders = [c for c in df.columns if c.startswith("x")]

# Wrap in CausalData with new column names
causal_data = CausalData(
    df=df,
    treatment='t',
    outcome='y',
    confounders=confounders
)
causal_data.df.head(100)
y t x1 x2 x3 x4 x5
0 0.302852 0.0 0.304717 -1.039984 0.750451 0.940565 -1.951035
1 -0.942206 1.0 -1.302180 0.127840 -0.316243 -0.016801 -0.853044
2 0.910524 1.0 0.879398 0.777792 0.066031 1.127241 0.467509
3 2.332408 1.0 -0.859292 0.368751 -0.958883 0.878450 -0.049926
4 -1.012732 0.0 -0.184862 -0.680930 1.222541 -0.154529 -0.428328
... ... ... ... ... ... ... ...
95 0.305495 0.0 -0.339258 1.063852 -1.141938 0.006339 2.597674
96 -0.865824 1.0 0.223080 1.433215 0.091520 0.580777 -0.056783
97 -0.027818 1.0 -0.170408 -0.779482 0.430301 -0.851537 0.665585
98 -1.263472 0.0 1.085287 0.366531 -0.286249 0.453966 -0.308673
99 -0.421367 0.0 0.935547 -1.831406 -0.335607 -1.990812 -1.495061

100 rows × 7 columns

Wrap in CausalData#

We provide a few covariates as confounders for DML (although the data is truly randomized).

from causalis.eda import CausalEDA
eda = CausalEDA(causal_data)
# 1) Outcome statistics by treatment
eda.outcome_stats()
count mean std min p10 p25 median p75 p90 max
treatment
0.0 5010 -0.009797 1.005403 -3.482370 -1.290654 -0.684156 -0.014663 0.657298 1.286337 3.627004
1.0 4990 0.519886 1.007209 -3.021571 -0.746023 -0.165270 0.503424 1.206379 1.790902 3.885919
# Shows means of confounders for control/treated groups, absolute differences, and SMD values
confounders_balance_df = eda.confounders_means()
display(confounders_balance_df)
mean_t_0 mean_t_1 abs_diff smd
confounders
x2 -0.014765 0.018033 0.032798 0.032770
x1 -0.022735 -0.008229 0.014505 0.014486
x5 0.007285 -0.006229 0.013514 -0.013459
x3 0.014590 0.007731 0.006859 -0.006917
x4 -0.000891 -0.000040 0.000850 0.000838

1) t-test (difference in means)#

tt_res = ttest(causal_data, confidence_level=0.95)
tt_res
{'p_value': 1.176719325899924e-147,
 'absolute_difference': 0.5296827782315828,
 'absolute_ci': (0.49023151024069744, 0.5691340462224682),
 'relative_difference': 5406.753548476556,
 'relative_ci': (5004.053494844908, 5809.453602108204)}

2) Linear regression#

The coefficient on treatment equals the difference in group means in an RCT.

# Python
import numpy as np
import statsmodels.api as sm

# Outcome
y = causal_data.target.to_numpy()

# Base X (no centering)
X_base = causal_data.df[confounders].to_numpy()
xbar = X_base.mean(axis=0)  # means of confounders, shape (p,)

# Treatment and interactions
T = causal_data.treatment.to_numpy().reshape(-1, 1)
TX = X_base * T

# Design matrix: intercept + T + X + T*X
X_design = np.column_stack([np.ones(len(T)), T, X_base, TX])

# Fit OLS with robust SE
res = sm.OLS(y, X_design).fit(cov_type="HC3")

# Dimensions and index bookkeeping
p = X_base.shape[1]
idx_const = 0
idx_T = 1
idx_X_start = 2
idx_X_end = idx_X_start + p       # exclusive
idx_TX_start = idx_X_end
idx_TX_end = idx_TX_start + p     # exclusive

# Parameter vector is [const, beta_T, beta_X (p), gamma_TX (p)]
beta = res.params
V = res.cov_params()

# Average treatment effect under the linear-interaction model:
# theta = beta_T + xbar' * gamma
theta_hat = float(beta[idx_T] + (xbar @ beta[idx_TX_start:idx_TX_end]))

# Delta-method variance: Var(a' beta) = a' V a
a = np.zeros_like(beta)
a[idx_T] = 1.0
a[idx_TX_start:idx_TX_end] = xbar
var_theta = float(a @ V @ a)
se_theta = float(np.sqrt(max(var_theta, 0.0)))

# 95% CI (normal approx)
from scipy.stats import norm
z = norm.ppf(0.975)
ci_low = theta_hat - z * se_theta
ci_high = theta_hat + z * se_theta

# Two-sided p-value for H0: theta = 0
zstat = theta_hat / se_theta if se_theta > 0 else np.inf
pval = 2 * (1 - norm.cdf(abs(zstat)))

theta_hat, (ci_low, ci_high), se_theta, pval
(0.5294584904981685,
 (np.float64(0.4899759986996098), np.float64(0.5689409822967272)),
 0.02014449862854194,
 np.float64(0.0))

3) Double Machine Learning (ATE)#

We estimate ATE using DoubleML with default learners.

dml_res = dml_ate_source(causal_data, n_folds=3, confidence_level=0.95)
dml_res
{'coefficient': 0.5438121651196453,
 'std_error': 0.021106462512239948,
 'p_value': 2.1780238928091667e-146,
 'confidence_interval': (0.5024442587546102, 0.5851800714846803),
 'model': <doubleml.irm.irm.DoubleMLIRM at 0x1683801a0>}

Compare estimates#

tt_ci_low, tt_ci_high = tt_res['absolute_ci']              # from the t-test
lin_ci_low, lin_ci_high = ci_low, ci_high                  # from your delta-method calc
dml_ci_low, dml_ci_high = dml_res['confidence_interval']   # from DoubleML

comparison = pd.DataFrame({
    'method': ['t-test', 'linear_regression', 'dml_ate'],
    'estimate': [
        tt_res['absolute_difference'],
        theta_hat,
        dml_res['coefficient']
    ],
    'ci_lower': [
        tt_ci_low,
        lin_ci_low,
        dml_ci_low
    ],
    'ci_upper': [
        tt_ci_high,
        lin_ci_high,
        dml_ci_high
    ]
})

comparison
method estimate ci_lower ci_upper
0 t-test 0.529683 0.490232 0.569134
1 linear_regression 0.529458 0.489976 0.568941
2 dml_ate 0.543812 0.502444 0.585180

Ground truth theta = 0.5