RCT Benchmark: t-test vs Linear Regression vs DML ATE#

import numpy as np
import pandas as pd

from causalis.data.generators import generate_rct
from causalis.data.causaldata import CausalData
from causalis.inference.atte.ttest import ttest
from causalis.inference.ate.dml_ate_source import dml_ate_source

np.random.seed(42)

Generate RCT data#

We’ll generate a balanced (50/50) RCT with a continuous outcome where the treated group’s mean exceeds control by 0.5 units.

# Generate clean RCT without legacy ancillary columns
n = 10000
theta = 0.5

df = generate_rct(
    n=n,
    split=0.5,
    random_state=42,
    target_type="normal",
    target_params={"mean": {"A": 0.0, "B": theta}, "std": 1.0},
    k=5,                  # 5 pre-treatment covariates X independent of T
    add_ancillary=False   # <- no legacy/post-treatment columns
)

# Use only baseline X columns as confounders
confounders = [c for c in df.columns if c.startswith("x")]

# Wrap in CausalData with new column names
causal_data = CausalData(
    df=df,
    treatment='t',
    outcome='y',
    confounders=confounders
)

causal_data.df.head(100)

	y	t	x1	x2	x3	x4	x5
0	0.302852	0.0	0.304717	-1.039984	0.750451	0.940565	-1.951035
1	-0.942206	1.0	-1.302180	0.127840	-0.316243	-0.016801	-0.853044
2	0.910524	1.0	0.879398	0.777792	0.066031	1.127241	0.467509
3	2.332408	1.0	-0.859292	0.368751	-0.958883	0.878450	-0.049926
4	-1.012732	0.0	-0.184862	-0.680930	1.222541	-0.154529	-0.428328
...	...	...	...	...	...	...	...
95	0.305495	0.0	-0.339258	1.063852	-1.141938	0.006339	2.597674
96	-0.865824	1.0	0.223080	1.433215	0.091520	0.580777	-0.056783
97	-0.027818	1.0	-0.170408	-0.779482	0.430301	-0.851537	0.665585
98	-1.263472	0.0	1.085287	0.366531	-0.286249	0.453966	-0.308673
99	-0.421367	0.0	0.935547	-1.831406	-0.335607	-1.990812	-1.495061

100 rows × 7 columns

Wrap in CausalData#

We provide a few covariates as confounders for DML (although the data is truly randomized).

from causalis.eda import CausalEDA
eda = CausalEDA(causal_data)

# 1) Outcome statistics by treatment
eda.outcome_stats()

	count	mean	std	min	p10	p25	median	p75	p90	max
treatment
0.0	5010	-0.009797	1.005403	-3.482370	-1.290654	-0.684156	-0.014663	0.657298	1.286337	3.627004
1.0	4990	0.519886	1.007209	-3.021571	-0.746023	-0.165270	0.503424	1.206379	1.790902	3.885919

# Shows means of confounders for control/treated groups, absolute differences, and SMD values
confounders_balance_df = eda.confounders_means()
display(confounders_balance_df)

	mean_t_0	mean_t_1	abs_diff	smd
confounders
x2	-0.014765	0.018033	0.032798	0.032770
x1	-0.022735	-0.008229	0.014505	0.014486
x5	0.007285	-0.006229	0.013514	-0.013459
x3	0.014590	0.007731	0.006859	-0.006917
x4	-0.000891	-0.000040	0.000850	0.000838

1) t-test (difference in means)#

tt_res = ttest(causal_data, confidence_level=0.95)
tt_res

{'p_value': 1.176719325899924e-147,
 'absolute_difference': 0.5296827782315828,
 'absolute_ci': (0.49023151024069744, 0.5691340462224682),
 'relative_difference': 5406.753548476556,
 'relative_ci': (5004.053494844908, 5809.453602108204)}

2) Linear regression#

The coefficient on treatment equals the difference in group means in an RCT.

# Python
import numpy as np
import statsmodels.api as sm

# Outcome
y = causal_data.target.to_numpy()

# Base X (no centering)
X_base = causal_data.df[confounders].to_numpy()
xbar = X_base.mean(axis=0)  # means of confounders, shape (p,)

# Treatment and interactions
T = causal_data.treatment.to_numpy().reshape(-1, 1)
TX = X_base * T

# Design matrix: intercept + T + X + T*X
X_design = np.column_stack([np.ones(len(T)), T, X_base, TX])

# Fit OLS with robust SE
res = sm.OLS(y, X_design).fit(cov_type="HC3")

# Dimensions and index bookkeeping
p = X_base.shape[1]
idx_const = 0
idx_T = 1
idx_X_start = 2
idx_X_end = idx_X_start + p       # exclusive
idx_TX_start = idx_X_end
idx_TX_end = idx_TX_start + p     # exclusive

# Parameter vector is [const, beta_T, beta_X (p), gamma_TX (p)]
beta = res.params
V = res.cov_params()

# Average treatment effect under the linear-interaction model:
# theta = beta_T + xbar' * gamma
theta_hat = float(beta[idx_T] + (xbar @ beta[idx_TX_start:idx_TX_end]))

# Delta-method variance: Var(a' beta) = a' V a
a = np.zeros_like(beta)
a[idx_T] = 1.0
a[idx_TX_start:idx_TX_end] = xbar
var_theta = float(a @ V @ a)
se_theta = float(np.sqrt(max(var_theta, 0.0)))

# 95% CI (normal approx)
from scipy.stats import norm
z = norm.ppf(0.975)
ci_low = theta_hat - z * se_theta
ci_high = theta_hat + z * se_theta

# Two-sided p-value for H0: theta = 0
zstat = theta_hat / se_theta if se_theta > 0 else np.inf
pval = 2 * (1 - norm.cdf(abs(zstat)))

theta_hat, (ci_low, ci_high), se_theta, pval

(0.5294584904981685,
 (np.float64(0.4899759986996098), np.float64(0.5689409822967272)),
 0.02014449862854194,
 np.float64(0.0))

3) Double Machine Learning (ATE)#

We estimate ATE using DoubleML with default learners.

dml_res = dml_ate_source(causal_data, n_folds=3, confidence_level=0.95)
dml_res

{'coefficient': 0.5438121651196453,
 'std_error': 0.021106462512239948,
 'p_value': 2.1780238928091667e-146,
 'confidence_interval': (0.5024442587546102, 0.5851800714846803),
 'model': <doubleml.irm.irm.DoubleMLIRM at 0x1683801a0>}

Compare estimates#

tt_ci_low, tt_ci_high = tt_res['absolute_ci']              # from the t-test
lin_ci_low, lin_ci_high = ci_low, ci_high                  # from your delta-method calc
dml_ci_low, dml_ci_high = dml_res['confidence_interval']   # from DoubleML

comparison = pd.DataFrame({
    'method': ['t-test', 'linear_regression', 'dml_ate'],
    'estimate': [
        tt_res['absolute_difference'],
        theta_hat,
        dml_res['coefficient']
    ],
    'ci_lower': [
        tt_ci_low,
        lin_ci_low,
        dml_ci_low
    ],
    'ci_upper': [
        tt_ci_high,
        lin_ci_high,
        dml_ci_high
    ]
})

comparison

	method	estimate	ci_lower	ci_upper
0	t-test	0.529683	0.490232	0.569134
1	linear_regression	0.529458	0.489976	0.568941
2	dml_ate	0.543812	0.502444	0.585180

Ground truth theta = 0.5