Loading...
Loading...
Bayesian modeling with PyMC. Build hierarchical models, MCMC (NUTS), variational inference, LOO/WAIC comparison, posterior checks, for probabilistic programming and inference.
npx skill4agent add davila7/claude-code-templates pymc-bayesian-modelingimport pymc as pm
import arviz as az
import numpy as np
# Load and prepare data
X = ... # Predictors
y = ... # Outcomes
# Standardize predictors for better sampling
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_scaled = (X - X_mean) / X_stdcoordscoords = {
'predictors': ['var1', 'var2', 'var3'],
'obs_id': np.arange(len(y))
}
with pm.Model(coords=coords) as model:
# Priors
alpha = pm.Normal('alpha', mu=0, sigma=1)
beta = pm.Normal('beta', mu=0, sigma=1, dims='predictors')
sigma = pm.HalfNormal('sigma', sigma=1)
# Linear predictor
mu = alpha + pm.math.dot(X_scaled, beta)
# Likelihood
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y, dims='obs_id')HalfNormalExponentialdimsshapepm.Data()with model:
prior_pred = pm.sample_prior_predictive(samples=1000, random_seed=42)
# Visualize
az.plot_ppc(prior_pred, group='prior')with model:
# Optional: Quick exploration with ADVI
# approx = pm.fit(n=20000)
# Full MCMC inference
idata = pm.sample(
draws=2000,
tune=1000,
chains=4,
target_accept=0.9,
random_seed=42,
idata_kwargs={'log_likelihood': True} # For model comparison
)draws=2000tune=1000chains=4target_accept=0.9log_likelihood=Truefrom scripts.model_diagnostics import check_diagnostics
results = check_diagnostics(idata, var_names=['alpha', 'beta', 'sigma'])target_accept=0.95with model:
pm.sample_posterior_predictive(idata, extend_inferencedata=True, random_seed=42)
# Visualize
az.plot_ppc(idata)# Summary statistics
print(az.summary(idata, var_names=['alpha', 'beta', 'sigma']))
# Posterior distributions
az.plot_posterior(idata, var_names=['alpha', 'beta', 'sigma'])
# Coefficient estimates
az.plot_forest(idata, var_names=['beta'], combined=True)X_new = ... # New predictor values
X_new_scaled = (X_new - X_mean) / X_std
with model:
pm.set_data({'X_scaled': X_new_scaled})
post_pred = pm.sample_posterior_predictive(
idata.posterior,
var_names=['y_obs'],
random_seed=42
)
# Extract prediction intervals
y_pred_mean = post_pred.posterior_predictive['y_obs'].mean(dim=['chain', 'draw'])
y_pred_hdi = az.hdi(post_pred.posterior_predictive, var_names=['y_obs'])with pm.Model() as linear_model:
alpha = pm.Normal('alpha', mu=0, sigma=10)
beta = pm.Normal('beta', mu=0, sigma=10, shape=n_predictors)
sigma = pm.HalfNormal('sigma', sigma=1)
mu = alpha + pm.math.dot(X, beta)
y = pm.Normal('y', mu=mu, sigma=sigma, observed=y_obs)assets/linear_regression_template.pywith pm.Model() as logistic_model:
alpha = pm.Normal('alpha', mu=0, sigma=10)
beta = pm.Normal('beta', mu=0, sigma=10, shape=n_predictors)
logit_p = alpha + pm.math.dot(X, beta)
y = pm.Bernoulli('y', logit_p=logit_p, observed=y_obs)with pm.Model(coords={'groups': group_names}) as hierarchical_model:
# Hyperpriors
mu_alpha = pm.Normal('mu_alpha', mu=0, sigma=10)
sigma_alpha = pm.HalfNormal('sigma_alpha', sigma=1)
# Group-level (non-centered)
alpha_offset = pm.Normal('alpha_offset', mu=0, sigma=1, dims='groups')
alpha = pm.Deterministic('alpha', mu_alpha + sigma_alpha * alpha_offset, dims='groups')
# Observation-level
mu = alpha[group_idx]
sigma = pm.HalfNormal('sigma', sigma=1)
y = pm.Normal('y', mu=mu, sigma=sigma, observed=y_obs)assets/hierarchical_model_template.pywith pm.Model() as poisson_model:
alpha = pm.Normal('alpha', mu=0, sigma=10)
beta = pm.Normal('beta', mu=0, sigma=10, shape=n_predictors)
log_lambda = alpha + pm.math.dot(X, beta)
y = pm.Poisson('y', mu=pm.math.exp(log_lambda), observed=y_obs)NegativeBinomialwith pm.Model() as ar_model:
sigma = pm.HalfNormal('sigma', sigma=1)
rho = pm.Normal('rho', mu=0, sigma=0.5, shape=ar_order)
init_dist = pm.Normal.dist(mu=0, sigma=sigma)
y = pm.AR('y', rho=rho, sigma=sigma, init_dist=init_dist, observed=y_obs)from scripts.model_comparison import compare_models, check_loo_reliability
# Fit models with log_likelihood
models = {
'Model1': idata1,
'Model2': idata2,
'Model3': idata3
}
# Compare using LOO
comparison = compare_models(models, ic='loo')
# Check reliability
check_loo_reliability(models)from scripts.model_comparison import model_averaging
averaged_pred, weights = model_averaging(models, var_name='y_obs')pm.HalfNormal('sigma', sigma=1)pm.Exponential('sigma', lam=1)pm.Gamma('sigma', alpha=2, beta=1)pm.Normal('theta', mu=0, sigma=1)pm.StudentT('theta', nu=3, mu=0, sigma=1)pm.LogNormal('theta', mu=0, sigma=1)pm.Gamma('theta', alpha=2, beta=1)pm.Beta('p', alpha=2, beta=2)pm.Uniform('p', lower=0, upper=1)pm.LKJCorr('corr', n=n_vars, eta=2)pm.Normal('y', mu=mu, sigma=sigma)pm.StudentT('y', nu=nu, mu=mu, sigma=sigma)pm.Poisson('y', mu=lambda)pm.NegativeBinomial('y', mu=mu, alpha=alpha)pm.ZeroInflatedPoisson('y', psi=psi, mu=mu)pm.Bernoulli('y', p=p)pm.Bernoulli('y', logit_p=logit_p)pm.Categorical('y', p=probs)references/distributions.mdidata = pm.sample(
draws=2000,
tune=1000,
chains=4,
target_accept=0.9,
random_seed=42
)target_accept=0.95pm.Metropolis()with model:
approx = pm.fit(n=20000, method='advi')
# Use for initialization
start = approx.sample(return_inferencedata=False)[0]
idata = pm.sample(start=start)references/sampling_inference.mdfrom scripts.model_diagnostics import create_diagnostic_report
create_diagnostic_report(
idata,
var_names=['alpha', 'beta', 'sigma'],
output_dir='diagnostics/'
)from scripts.model_diagnostics import check_diagnostics
results = check_diagnostics(idata)idata.sample_stats.diverging.sum() > 0target_accept=0.950.99ESS < 400draws=5000R-hat > 1.01tune=2000, draws=5000cores=8, chains=8dimstarget_accept=0.9log_likelihood=Truereferences/distributions.mdsampling_inference.mdworkflows.mdscripts/model_diagnostics.pycheck_diagnostics()create_diagnostic_report()model_comparison.pycompare_models()check_loo_reliability()model_averaging()assets/linear_regression_template.pyhierarchical_model_template.pywith pm.Model(coords={'var': names}) as model:
# Priors
param = pm.Normal('param', mu=0, sigma=1, dims='var')
# Likelihood
y = pm.Normal('y', mu=..., sigma=..., observed=data)idata = pm.sample(draws=2000, tune=1000, chains=4, target_accept=0.9)from scripts.model_diagnostics import check_diagnostics
check_diagnostics(idata)from scripts.model_comparison import compare_models
compare_models({'m1': idata1, 'm2': idata2}, ic='loo')with model:
pm.set_data({'X': X_new})
pred = pm.sample_posterior_predictive(idata.posterior)pm.model_to_graphviz(model)idata.to_netcdf('results.nc')az.from_netcdf('results.nc')