import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import arviz as az
import pymc as pm 
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.robust.scale import mad

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/arviz/__init__.py:50: FutureWarning: 
ArviZ is undergoing a major refactor to improve flexibility and extensibility while maintaining a user-friendly interface.
Some upcoming changes may be backward incompatible.
For details and migration guidance, visit: https://python.arviz.org/en/latest/user_guide/migration_guide.html
  warn(

def fit_and_plot_lm(data, predictors, outcome, add_constant=True, show_plot=True, scatter_kws=None, line_kws=None):
    """
    Fit a linear model using statsmodels, print summary, plot, and show formula.
    Args:
        data: pandas DataFrame
        predictors: list of predictor column names (str)
        outcome: outcome column name (str)
        add_constant: whether to add intercept (default True)
        show_plot: whether to plot (default True)
        scatter_kws: dict, kwargs for scatterplot
        line_kws: dict, kwargs for regression line
    """
    X = data[predictors].copy()
    if add_constant:
        X = sm.add_constant(X, prepend=False)
    y = data[outcome]
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    # Print formula
    params = results.params
    formula = f"{outcome} = " + " + ".join([f"{params[name]:.2f}*{name}" for name in predictors])
    if add_constant:
        formula = f"{outcome} = {params['const']:.2f} + " + " + ".join([f"{params[name]:.2f}*{name}" for name in predictors])
    print("Formula:", formula)
    # Print residual standard deviation and its uncertainty
    sigma = np.sqrt(results.mse_resid)
    sigma_se = sigma / np.sqrt(2 * results.df_resid)
    print(f"Residual std dev (σ): {sigma:.2f} ± {sigma_se:.2f}")
    # Print median absolute deviation of residuals
    print("MAD of residuals:", round(mad(results.resid), 2))
    # Plot if only one predictor
    if show_plot and len(predictors) == 1:
        x_name = predictors[0]
        ax = sns.scatterplot(data=data, x=x_name, y=outcome, **(scatter_kws or {}))
        x_vals = np.linspace(data[x_name].min(), data[x_name].max(), 100)
        y_vals = params['const'] + params[x_name] * x_vals if add_constant else params[x_name] * x_vals
        ax.plot(x_vals, y_vals, color='red', **(line_kws or {}))
        ax.set_title('Linear Regression Fit')
        plt.show()

def fit_and_plot_bayes(data, predictor, outcome,
                       intercept_mu=0, intercept_sigma=50,
                       slope_mu=0, slope_sigma=50,
                       sigma_sigma=50,
                       samples=2000, tune=1000, hdi_prob=0.95,
                       show_trace=True, show_forest=True,
                       show_posterior=True, show_regression=True,
                       n_regression_lines=100):
    """
    Fit a Bayesian simple linear regression using PyMC and optionally plot diagnostics.
    Args:
        data: pandas DataFrame
        predictor: str, name of the predictor column
        outcome: str, name of the outcome column
        intercept_mu, intercept_sigma: prior mean and std for intercept ~ Normal
        slope_mu, slope_sigma: prior mean and std for slope ~ Normal
        sigma_sigma: prior std for residual noise ~ HalfNormal
        samples: number of posterior draws
        tune: number of tuning steps
        hdi_prob: HDI probability for summaries and plots
        show_trace: plot trace and posterior density per parameter
        show_forest: plot forest (posterior means + HDI)
        show_posterior: plot posterior densities
        show_regression: plot data with posterior regression lines
        n_regression_lines: number of posterior draws to overlay on regression plot
    Returns:
        trace: PyMC InferenceData object
    """
    x = data[predictor].values
    y = data[outcome].values

    with pm.Model() as model:
        intercept = pm.Normal("intercept", mu=intercept_mu, sigma=intercept_sigma)
        slope = pm.Normal("slope", mu=slope_mu, sigma=slope_sigma)
        sigma = pm.HalfNormal("sigma", sigma=sigma_sigma)

        mu = intercept + slope * x
        likelihood = pm.Normal("y", mu=mu, sigma=sigma, observed=y)

        trace = pm.sample(samples, tune=tune)

    print(pm.summary(trace, hdi_prob=hdi_prob))

    if show_trace:
        az.plot_trace(trace)
        plt.tight_layout()
        plt.show()

    if show_forest:
        az.plot_forest(trace, hdi_prob=hdi_prob)
        plt.show()

    if show_posterior:
        az.plot_posterior(trace, hdi_prob=hdi_prob)
        plt.show()

    if show_regression:
        posterior = trace.posterior
        a_samples = posterior["intercept"].values.flatten()
        b_samples = posterior["slope"].values.flatten()

        plt.scatter(x, y)
        x_grid = np.linspace(x.min(), x.max(), 100)
        idx = np.random.choice(len(a_samples), n_regression_lines, replace=False)
        for i in idx:
            plt.plot(x_grid, a_samples[i] + b_samples[i] * x_grid, alpha=0.05, color="gray")
        plt.plot(x_grid, a_samples.mean() + b_samples.mean() * x_grid, color="red")
        plt.xlabel(predictor)
        plt.ylabel(outcome)
        plt.title("Bayesian regression with posterior uncertainty")
        plt.show()

    return trace

# Import hibbs.dat from /ros_data
hibbs = pd.read_csv('../ros_data/hibbs.dat', 
                    sep='\s+')

# Display the first few rows of the dataset
display(hibbs.head(10))

fit_and_plot_lm(data = hibbs, predictors = ['growth'], outcome = 'vote', add_constant=True, show_plot=True, scatter_kws=None, line_kws=None)



# Bayesian linear regression with PyMC
x = hibbs['growth'].values
y = hibbs['vote'].values

with pm.Model() as model:  # Create a new PyMC model context to define the Bayesian model
    intercept = pm.Normal("intercept", mu=0, sigma=50)  # Define a Normal prior for the intercept with mean 0 and std 50
    slope = pm.Normal("slope", mu=0, sigma=50)  # Define a Normal prior for the slope with mean 0 and std 50
    sigma = pm.HalfNormal("sigma", sigma=50)  # Define a HalfNormal prior for the noise std (constrained positive)

    mu = intercept + slope * x  # Compute the expected value of y as a linear function of x
    likelihood = pm.Normal("y", mu=mu, sigma=sigma, observed=y)  # Define the likelihood: observed y ~ Normal(mu, sigma)

    trace = pm.sample(2000, tune=1000)  # Run MCMC sampling: 2000 posterior draws after 1000 tuning steps

print(pm.summary(trace, hdi_prob=0.95))  # Print posterior summary with 95% HDI (highest density interval)

# Plot the posterior distributions and trace
az.plot_trace(trace)
plt.tight_layout()
plt.show()

# Forest plot: shows posterior means and HDI intervals
az.plot_forest(trace, hdi_prob=0.95)

# Posterior plot: shows density of each parameter  
az.plot_posterior(trace, hdi_prob=0.95)
plt.show()

# Plot regression line with posterior uncertainty  
posterior = trace.posterior                        
a_samples = posterior["intercept"].values.flatten()
b_samples = posterior["slope"].values.flatten()    
                                                    
plt.scatter(x, y) 
x_grid = np.linspace(x.min(), x.max(), 100)        
for i in np.random.choice(len(a_samples), 100,     
replace=False):                                    
    plt.plot(x_grid, a_samples[i] + b_samples[i] * 
x_grid, alpha=0.05, color="gray")                  
plt.plot(x_grid, a_samples.mean() +
b_samples.mean() * x_grid, color="red")            
plt.xlabel("growth")
plt.ylabel("vote")                                 
plt.title("Bayesian regression with posterior uncertainty")                                      
plt.show()

<>:3: SyntaxWarning: invalid escape sequence '\s'
<>:3: SyntaxWarning: invalid escape sequence '\s'
/var/folders/6m/c2f6bq0j2fbclrsw82pn99380000gn/T/ipykernel_44536/254143673.py:3: SyntaxWarning: invalid escape sequence '\s'
  sep='\s+')

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   vote   R-squared:                       0.580
Model:                            OLS   Adj. R-squared:                  0.550
Method:                 Least Squares   F-statistic:                     19.32
Date:                Tue, 31 Mar 2026   Prob (F-statistic):           0.000610
Time:                        07:55:45   Log-Likelihood:                -42.839
No. Observations:                  16   AIC:                             89.68
Df Residuals:                      14   BIC:                             91.22
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
growth         3.0605      0.696      4.396      0.001       1.567       4.554
const         46.2476      1.622     28.514      0.000      42.769      49.726
==============================================================================
Omnibus:                        5.392   Durbin-Watson:                   2.379
Prob(Omnibus):                  0.067   Jarque-Bera (JB):                2.828
Skew:                          -0.961   Prob(JB):                        0.243
Kurtosis:                       3.738   Cond. No.                         4.54
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Formula: vote = 46.25 + 3.06*growth
Residual std dev (σ): 3.76 ± 0.71
MAD of residuals: 2.27

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  46.233  1.858    42.250     49.720      0.033    0.028    3150.0   
slope       3.069  0.791     1.550      4.695      0.014    0.013    3114.0   
sigma       4.138  0.930     2.624      5.969      0.017    0.024    2686.0   

           ess_tail  r_hat  
intercept    3200.0    1.0  
slope        3351.0    1.0  
sigma        2134.0    1.0

# Using the matrix of posterior simulations to express uncertainty about a parameter estimate or function of parameter estimates

a = posterior["intercept"].values.flatten()        
b = posterior["slope"].values.flatten()            
print("First 5 samples of intercept (a):", a[:5])
print("Length of a samples:", len(a))
print("First 5 samples of slope (b):", b[:5])
print("Length of b samples:", len(b))

# Compute the ratio of intercept to slope for each posterior sample - just an example of a function of parameters
z = a / b                                          
print(f"median: {np.median(z):.2f}, MAD:{mad(z):.2f}")

First 5 samples of intercept (a): [46.33711389 46.33711389 46.33711389 46.33711389 45.40322975]
Length of a samples: 8000
First 5 samples of slope (b): [3.00528213 3.00528213 3.00528213 3.00528213 3.47101783]
Length of b samples: 8000
median: 15.08, MAD:4.07

# --- Point prediction ---
# Predict the incumbent party's vote share given 2.0% economic growth
# This uses the posterior mean of the intercept and slope as point estimates

new_growth = 2.0                                    # Hypothetical new value of economic growth
a_hat = a.mean()                                    # Posterior mean of intercept (point estimate)
b_hat = b.mean()                                    # Posterior mean of slope (point estimate)
y_point_pred = a_hat + b_hat * new_growth           # Point prediction: y = a_hat + b_hat * x_new
print(f"Point prediction: {a_hat:.1f} + {b_hat:.1f} * {new_growth} = {y_point_pred:.1f}")

Point prediction: 46.2 + 3.1 * 2.0 = 52.4

# Linear prediction with uncertainty

a = posterior["intercept"].values.flatten()
b = posterior["slope"].values.flatten()
y_pred_samples = a + b * new_growth  # Compute predicted y for each posterior sample of a and b
# print y_pred_samples head and length to check
print("First 5 predicted y samples:", y_pred_samples[:5])
print("Length of predicted y samples:", len(y_pred_samples))
print(f"Prediction with uncertainty: median={np.median(y_pred_samples):.1f}, MAD={mad(y_pred_samples):.1f}")
# print distribution of predicted y samples
plt.hist(y_pred_samples, bins=30, density=True, alpha=0.7)
plt.xlabel("Predicted vote share")
plt.ylabel("Density")
plt.title("Posterior predictive distribution for new growth=2.0%")
plt.show()

First 5 predicted y samples: [52.34767815 52.34767815 52.34767815 52.34767815 52.34526541]
Length of predicted y samples: 8000
Prediction with uncertainty: median=52.4, MAD=1.0

# --- Predictive distribution for a new observation (posterior_predict) ---
# Adds residual noise (sigma) to the linear predictor to account for individual-level variability
# This is the full predictive distribution: y_pred = a + b * x_new + epsilon

sigma_samples = trace.posterior["sigma"].values.flatten()  # Posterior samples of residual std dev
n_sims = len(a)                                            # Number of posterior draws
y_pred = a + b * new_growth + np.random.normal(0, sigma_samples, n_sims)  # Add random noise ~ N(0, sigma) to each draw

# Summarize the predictive distribution
y_pred_median = np.median(y_pred)                   # Median of predictive distribution
y_pred_mad = mad(y_pred)                            # MAD: robust measure of spread

# Win probability: the fraction of simulated elections where the incumbent gets > 50% of the vote.
# y_pred > 50 produces a boolean array (True/False for each of the 8000 simulations).
# np.mean() treats True as 1 and False as 0, so the mean IS the proportion.
# E.g., if 5600 out of 8000 simulations have y_pred > 50, then win_prob = 5600/8000 = 0.70.
# This works because each simulation already incorporates both parameter uncertainty AND
# residual variability, so the proportion directly estimates the probability.
win_prob = np.mean(y_pred > 50)

print(f"Predicted incumbent vote share: {y_pred_median:.1f}, with s.e. {y_pred_mad:.1f}")
print(f"Pr(incumbent win) = {win_prob:.2f}")

# Plot the predictive distribution
fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(y_pred, bins=50, density=True, alpha=0.7, color='steelblue', edgecolor='white')
ax.axvline(50, color='black', linestyle='--', label='50% threshold')  # Win/lose boundary
ax.axvline(y_pred_median, color='red', linestyle='--', label=f'Median: {y_pred_median:.1f}')
ax.set_xlabel('Predicted vote share (%)')
ax.set_ylabel('Density')
ax.set_title(f'Predictive distribution at growth = {new_growth}%')
ax.legend()
plt.show()

Predicted incumbent vote share: 52.4, with s.e. 4.1
Pr(incumbent win) = 0.72

# Create a grid of growth values from -2.0 to 4.0 in steps of 0.5
new_grid = np.arange(-2.0, 4.5, 0.5)

# Point predictions: single best guess (posterior mean) for each growth value
y_point_pred_grid = a.mean() + b.mean() * new_grid

# Linear predictor with uncertainty: posterior draws of a + b*x for each growth value (n_sims x n_grid matrix)
y_linpred_grid = a[:, None] + b[:, None] * new_grid[None, :]

# Full predictive distribution: adds residual noise (sigma) to each linear predictor draw
y_pred_grid = y_linpred_grid + np.random.normal(0, sigma_samples[:, None], y_linpred_grid.shape)

print("First 5 point predictions for grid:", y_point_pred_grid[:5])
print("Shape of linear predictor grid:", y_linpred_grid.shape)
print("Shape of full predictive grid:", y_pred_grid.shape)

display(pd.DataFrame({
    'growth': new_grid,
    'point_pred': y_point_pred_grid,
    'linpred_median': np.median(y_linpred_grid, axis=0),
    'linpred_mad': mad(y_linpred_grid, axis=0),
    'pred_median': np.median(y_pred_grid, axis=0),
    'pred_mad': mad(y_pred_grid, axis=0)
}))

First 5 point predictions for grid: [40.09547055 41.62978762 43.16410469 44.69842176 46.23273884]
Shape of linear predictor grid: (8000, 13)
Shape of full predictive grid: (8000, 13)

x_new = np.random.normal(2, 0.3, size=n_sims) # Simulate n new growth values from N(2, 0.3) to represent uncertainty in the new growth value
y_pred = a + b * x_new + np.random.normal(0, sigma_samples, n_sims)  # Add random noise ~ N(0, sigma) to each draw

# Summarize the predictive distribution
y_pred_median = np.median(y_pred)                   # Median of predictive distribution
y_pred_mad = mad(y_pred)                            # MAD: robust measure of spread

# Win probability: the fraction of simulated elections where the incumbent gets > 50% of the vote.
# y_pred > 50 produces a boolean array (True/False for each of the 8000 simulations).
# np.mean() treats True as 1 and False as 0, so the mean IS the proportion.
# E.g., if 5600 out of 8000 simulations have y_pred > 50, then win_prob = 5600/8000 = 0.70.
# This works because each simulation already incorporates both parameter uncertainty AND
# residual variability, so the proportion directly estimates the probability.
win_prob = np.mean(y_pred > 50)

print(f"Predicted incumbent vote share: {y_pred_median:.1f}, with s.e. {y_pred_mad:.1f}")
print(f"Pr(incumbent win) = {win_prob:.2f}")

# Plot the predictive distribution
fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(y_pred, bins=50, density=True, alpha=0.7, color='steelblue', edgecolor='white')
ax.axvline(50, color='black', linestyle='--', label='50% threshold')  # Win/lose boundary
ax.axvline(y_pred_median, color='red', linestyle='--', label=f'Median: {y_pred_median:.1f}')
ax.set_xlabel('Predicted vote share (%)')
ax.set_ylabel('Density')
ax.set_title(f'Predictive distribution at growth = {new_growth}%')
ax.legend()
plt.show()

Predicted incumbent vote share: 52.4, with s.e. 4.2
Pr(incumbent win) = 0.71

earnings = pd.read_csv('../ros_data/earnings.csv', skiprows=0)
earnings['earn_k'] = earnings['earn'] / 1000
earnings['c_height'] = earnings['height'] - 66 # Center height around 66 inches for better interpretability of the intercept

display(earnings.head())

earnings_clean = earnings.dropna(subset=['c_height', 'weight'])

print(earnings_clean[['c_height', 'weight']].info())             
print(earnings_clean[['c_height', 'weight']].isna().sum())

fit_and_plot_lm(earnings_clean, predictors=['c_height'], outcome='weight', add_constant=True, show_plot=True, scatter_kws=None, line_kws=None)

trace_2 = fit_and_plot_bayes(data = earnings_clean, predictor = 'c_height', outcome = 'weight',
                    intercept_mu=0, intercept_sigma=50,
                    slope_mu=0, slope_sigma=50,
                    sigma_sigma=50,
                    samples=2000, tune=1000, hdi_prob=0.95,
                    show_trace=True, show_forest=True,
                    show_posterior=True, show_regression=True,
                    n_regression_lines=100)

<class 'pandas.DataFrame'>
Index: 1789 entries, 0 to 1815
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   c_height  1789 non-null   int64  
 1   weight    1789 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 41.9 KB
None
c_height    0
weight      0
dtype: int64
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 weight   R-squared:                       0.301
Model:                            OLS   Adj. R-squared:                  0.300
Method:                 Least Squares   F-statistic:                     769.1
Date:                Tue, 31 Mar 2026   Prob (F-statistic):          4.35e-141
Time:                        07:55:48   Log-Likelihood:                -8558.6
No. Observations:                1789   AIC:                         1.712e+04
Df Residuals:                    1787   BIC:                         1.713e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c_height       4.9494      0.178     27.733      0.000       4.599       5.299
const        153.3726      0.693    221.435      0.000     152.014     154.731
==============================================================================
Omnibus:                      343.790   Durbin-Watson:                   1.912
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              707.829
Skew:                           1.116   Prob(JB):                    1.98e-154
Kurtosis:                       5.124   Cond. No.                         3.93
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Formula: weight = 153.37 + 4.95*c_height
Residual std dev (σ): 28.95 ± 0.48
MAD of residuals: 22.76

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.

              mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  153.354  0.692   152.001    154.728      0.006    0.008   11467.0   
slope        4.951  0.176     4.615      5.297      0.002    0.002   11689.0   
sigma       28.968  0.479    28.061     29.934      0.005    0.005   10952.0   

           ess_tail  r_hat  
intercept    6285.0    1.0  
slope        6413.0    1.0  
sigma        6257.0    1.0

# Extract posterior samples from the earnings model (fit_2 equivalent)
posterior_2 = trace_2.posterior
a = posterior_2["intercept"].values.flatten()          # Posterior samples of intercept
b = posterior_2["slope"].values.flatten()              # Posterior samples of slope
sigma_samples = posterior_2["sigma"].values.flatten()  # Posterior samples of residual std dev

# New data: predict weight for a person 70 inches tall (c_height = 70 - 66 = 4)
new_c_height = 4.0

# Point prediction: single best guess using posterior means
y_point_pred_2 = a.mean() + b.mean() * new_c_height
print(f"Point prediction at c_height={new_c_height}: {y_point_pred_2:.1f}")

# Linear predictor with uncertainty: posterior draws of a + 4.0*b
# Captures uncertainty in the regression line but not individual variability
y_linpred_2 = a + b * new_c_height
print(f"Linear predictor: median={np.median(y_linpred_2):.1f}, MAD={mad(y_linpred_2):.1f}")

# Posterior predictive distribution: a + 4.0*b + epsilon
# Captures both parameter uncertainty and individual-level variability
y_postpred_2 = a + b * new_c_height + np.random.normal(0, sigma_samples)
print(f"Posterior predictive: median={np.median(y_postpred_2):.1f}, MAD={mad(y_postpred_2):.1f}")

Point prediction at c_height=4.0: 173.2
Linear predictor: median=173.2, MAD=0.9
Posterior predictive: median=173.2, MAD=29.1

# Prior: forecast model estimate
theta_prior = 0.524 # prior mean (52.4% Democratic vote share)
se_prior = 0.041 # prior standard error (uncertainty of 4.1%)

# Data: poll estimate
theta_data = 190 / 400 # poll proportion (0.475)
se_data = np.sqrt(theta_data * (1 - theta_data) / 400) # standard error of poll proportion (0.025)

# Calculate Bayes estimate of vote share by combining prior and data
# Assuming both are normally distributed and independent, the posterior mean is a weighted average:
theta_post = (theta_prior / se_prior**2 + theta_data / se_data**2) / (1 / se_prior**2 + 1 / se_data**2)
se_post = np.sqrt(1 / (1 / se_prior**2 + 1 / se_data**2)) # posterior standard error
print(f"Posterior mean (Bayes estimate): {theta_post:.3f}, with s.e. {se_post:.3f}")

# Grid of theta values to evaluate the densities
theta_grid = np.linspace(0.35, 0.65, 500) # range of vote share values

# Compute normal densities for prior and data
from scipy.stats import norm # import normal distribution
prior_density = norm.pdf(theta_grid, theta_prior, se_prior) # prior distribution
data_density = norm.pdf(theta_grid, theta_data, se_data) # data (likelihood) distribution
bayes_density = norm.pdf(theta_grid, theta_post, se_post) # posterior distribution

# Plot both distributions
fig, ax = plt.subplots(figsize=(8, 4)) # create figure
ax.plot(theta_grid, prior_density, label=f'Prior: N({theta_prior}, {se_prior})') # plot prior
ax.plot(theta_grid, data_density, label=f'Data: N({theta_data:.3f}, {se_data:.3f})') # plot data
ax.plot(theta_grid, bayes_density, label=f'Posterior: N({theta_post:.3f}, {se_post:.3f})') # plot posterior
ax.set_xlabel('θ (Democratic vote share)') # x-axis label
ax.set_ylabel('Density') # y-axis label
ax.set_title('Prior and data distributions') # title
ax.axvline(0.5, color='black', linestyle='--', alpha=0.3, label='50% threshold') # reference line
ax.legend() # show legend
plt.show() # display plot

Posterior mean (Bayes estimate): 0.488, with s.e. 0.021

import pyreadr
from scipy.stats import norm

result = pyreadr.read_r('/Users/cairanvanrooyen/Desktop/personal/website/alfolio-project/assets/ros_data/sexratio.rda')
print("Keys in the R data file:", result.keys()) # Print the keys to see what data frames are available
df = result[list(result.keys())[0]]
# rename columns to something more convenient
df.columns = ['attractiveness', 'percentage_female']
display(df.head())

fit_and_plot_lm(data = df, predictors = ['attractiveness'], outcome = 'percentage_female', add_constant=True, show_plot=True, scatter_kws=None, line_kws=None)

# --- Bayesian conjugate normal update ---
# Prior: no reason to believe effect exists, highly implausible beyond ±0.5%
theta_prior = 0.0       # prior mean (0%)
se_prior = 0.0025       # prior SD (0.25%, so 2 SDs ≈ 0.5%)

# Data: survey estimate
theta_data = 0.06       # observed difference (6%)
se_data = 0.03          # standard error (3%)

# Posterior (normal-normal conjugate update)
theta_post = (theta_prior / se_prior**2 + theta_data / se_data**2) / (1 / se_prior**2 + 1 / se_data**2)
se_post = np.sqrt(1 / (1 / se_prior**2 + 1 / se_data**2))
print(f"Posterior mean: {theta_post:.4f} ({theta_post*100:.2f}%), SE: {se_post:.4f} ({se_post*100:.2f}%)")

# Plot prior, data likelihood, and posterior
theta_grid = np.linspace(-0.02, 0.10, 500)
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(theta_grid, norm.pdf(theta_grid, theta_prior, se_prior), label=f'Prior: N({theta_prior}, {se_prior})')
ax.plot(theta_grid, norm.pdf(theta_grid, theta_data, se_data), label=f'Data: N({theta_data}, {se_data})')
ax.plot(theta_grid, norm.pdf(theta_grid, theta_post, se_post), label=f'Posterior: N({theta_post:.4f}, {se_post:.4f})')
ax.axvline(0, color='black', linestyle='--', alpha=0.3)
ax.set_xlabel('θ (difference in proportion female)')
ax.set_ylabel('Density')
ax.set_title('Beauty and sex ratio: Bayesian update')
ax.legend()
plt.show()

Keys in the R data file: odict_keys(['sexratio'])

                            OLS Regression Results                            
==============================================================================
Dep. Variable:      percentage_female   R-squared:                       0.284
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                     1.190
Date:                Tue, 31 Mar 2026   Prob (F-statistic):              0.355
Time:                        07:55:50   Log-Likelihood:                -13.166
No. Observations:                   5   AIC:                             30.33
Df Residuals:                       3   BIC:                             29.55
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
attractiveness     1.5000      1.375      1.091      0.355      -2.875       5.875
const             49.4000      1.944     25.409      0.000      43.213      55.587
==============================================================================
Omnibus:                          nan   Durbin-Watson:                   2.698
Prob(Omnibus):                    nan   Jarque-Bera (JB):                0.658
Skew:                          -0.132   Prob(JB):                        0.720
Kurtosis:                       1.242   Cond. No.                         1.41
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Formula: percentage_female = 49.40 + 1.50*attractiveness
Residual std dev (σ): 4.35 ± 1.77
MAD of residuals: 4.45

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given.
  warn("omni_normtest is not valid with less than 8 observations; %i "

Posterior mean: 0.0004 (0.04%), SE: 0.0025 (0.25%)

# Bayesian linear regression with PyMC
x = hibbs['growth'].values
y = hibbs['vote'].values

with pm.Model() as model:  # Create a new PyMC model context to define the Bayesian model
    intercept = pm.Flat("intercept")  # Define a flat (uniform) prior for the intercept, which is uninformative
    slope = pm.Flat("slope")  # Define a flat (uniform) prior for the slope, which is uninformative
    sigma = pm.HalfFlat("sigma")  # Define a flat (improper) prior for the noise std (constrained positive)

    mu = intercept + slope * x  # Compute the expected value of y as a linear function of x
    likelihood = pm.Normal("y", mu=mu, sigma=sigma, observed=y)  # Define the likelihood: observed y ~ Normal(mu, sigma)

    trace = pm.sample(2000, tune=1000)  # Run MCMC sampling: 2000 posterior draws after 1000 tuning steps

print(pm.summary(trace, hdi_prob=0.95))  # Print posterior summary with 95% HDI (highest density interval)

# Plot the posterior distributions and trace
az.plot_trace(trace)
plt.tight_layout()
plt.show()

# Forest plot: shows posterior means and HDI intervals
az.plot_forest(trace, hdi_prob=0.95)

# Posterior plot: shows density of each parameter  
az.plot_posterior(trace, hdi_prob=0.95)
plt.show()

# Plot regression line with posterior uncertainty  
posterior = trace.posterior                        
a_samples = posterior["intercept"].values.flatten()
b_samples = posterior["slope"].values.flatten()    

# Plot data points
sns.scatterplot(x=a_samples, y=b_samples, size=0.5, alpha=0.1, color='blue')

# Heatmap
sns.kdeplot(x=a_samples, y=b_samples, fill=True, cmap='Reds', alpha=0.5)

# 3D contour plot
from scipy.stats import gaussian_kde
xy = np.vstack([a_samples, b_samples])
z = gaussian_kde(xy)(xy)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(a_samples, b_samples, z, c=z, cmap='Reds', alpha=0.5)
ax.set_xlabel('Intercept')
ax.set_ylabel('Slope')
ax.set_zlabel('Density')
ax.set_title('Joint posterior distribution of intercept and slope')
plt.show()

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  46.212  1.807    42.608     49.718      0.033    0.029    3084.0   
slope       3.069  0.773     1.607      4.641      0.014    0.012    3026.0   
sigma       4.154  0.903     2.647      5.929      0.015    0.017    3596.0   

           ess_tail  r_hat  
intercept    2865.0    1.0  
slope        3024.0    1.0  
sigma        3420.0    1.0

# standard deviation of growth
hibbs_growth_sd = hibbs['growth'].std()
print(f"Standard deviation of growth: {hibbs_growth_sd:.2f}")
# standard deviation of vote
hibbs_vote_sd = hibbs['vote'].std()
print(f"Standard deviation of vote: {hibbs_vote_sd:.2f}")
# mean vote
hibbs_vote_mean = hibbs['vote'].mean()
print(f"Mean vote: {hibbs_vote_mean:.2f}")

with pm.Model() as model:  # Create a new PyMC model context to define the Bayesian model
    intercept = pm.Normal("intercept", mu=hibbs_vote_mean, sigma=2.5*hibbs_vote_sd)  # Define a Normal prior for the intercept centered on mean vote
    slope = pm.Normal("slope", mu=0, sigma=2.5*(hibbs_vote_sd/hibbs_growth_sd))  # Define a Normal prior for the slope with mean 0 and scaled std
    sigma = pm.Exponential("sigma", lam=1/hibbs_vote_sd)  # Define an Exponential prior for the noise std with mean equal to sd of vote

    mu = intercept + slope * x  # Compute the expected value of y as a linear function of x
    likelihood = pm.Normal("y", mu=mu, sigma=sigma, observed=y)  # Define the likelihood: observed y ~ Normal(mu, sigma)

    trace = pm.sample(2000, tune=1000)  # Run MCMC sampling: 2000 posterior draws after 1000 tuning steps

print(pm.summary(trace, hdi_prob=0.95))  # Print posterior summary with 95% HDI (highest density interval)

# Plot the posterior distributions and trace
az.plot_trace(trace)
plt.tight_layout()
plt.show()

# Forest plot: shows posterior means and HDI intervals
az.plot_forest(trace, hdi_prob=0.95)

# Posterior plot: shows density of each parameter  
az.plot_posterior(trace, hdi_prob=0.95)
plt.show()

# Plot regression line with posterior uncertainty  
posterior = trace.posterior                        
a_samples = posterior["intercept"].values.flatten()
b_samples = posterior["slope"].values.flatten()    

# Plot data points
sns.scatterplot(x=a_samples, y=b_samples, size=0.5, alpha=0.1, color='blue')

Initializing NUTS using jitter+adapt_diag...

Standard deviation of growth: 1.40
Standard deviation of vote: 5.61
Mean vote: 52.05

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  46.374  1.718    42.879     49.682      0.028    0.023    3711.0   
slope       3.010  0.735     1.510      4.428      0.012    0.010    3671.0   
sigma       4.013  0.819     2.667      5.676      0.013    0.015    3944.0   

           ess_tail  r_hat  
intercept    4250.0    1.0  
slope        4135.0    1.0  
sigma        4127.0    1.0

<Axes: >

fit_and_plot_bayes(data = hibbs, predictor = 'growth', outcome = 'vote',
                       intercept_mu=50, intercept_sigma=10,
                       slope_mu=5, slope_sigma=5,
                       sigma_sigma=50,
                       samples=2000, tune=1000, hdi_prob=0.95,
                       show_trace=True, show_forest=True,
                       show_posterior=True, show_regression=True,
                       n_regression_lines=100)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  46.275  1.748    42.747     49.683      0.031    0.026    3353.0   
slope       3.068  0.753     1.626      4.622      0.013    0.011    3402.0   
sigma       4.126  0.878     2.648      5.849      0.015    0.016    3804.0   

           ess_tail  r_hat  
intercept    3294.0    1.0  
slope        3434.0    1.0  
sigma        4203.0    1.0

<xarray.Dataset> Size: 208kB
Dimensions:    (chain: 4, draw: 2000)
Coordinates:
  * chain      (chain) int64 32B 0 1 2 3
  * draw       (draw) int64 16kB 0 1 2 3 4 5 6 ... 1994 1995 1996 1997 1998 1999
Data variables:
    intercept  (chain, draw) float64 64kB 46.08 44.7 44.01 ... 45.08 44.91 47.64
    slope      (chain, draw) float64 64kB 2.782 3.444 4.179 ... 3.383 2.735
    sigma      (chain, draw) float64 64kB 4.39 3.445 4.124 ... 3.604 3.508 4.057
Attributes:
    created_at:                 2026-03-31T07:31:17.009084+00:00
    arviz_version:              0.23.4
    inference_library:          pymc
    inference_library_version:  5.28.2
    sampling_time:              0.7429487705230713
    tuning_steps:               1000

array([0, 1, 2, 3])

array([   0,    1,    2, ..., 1997, 1998, 1999], shape=(2000,))

array([[46.0760126 , 44.70295014, 44.00585163, ..., 44.15574953,
        43.99556743, 43.52002869],
       [49.3398523 , 42.50693833, 43.07786599, ..., 45.70648615,
        46.25707854, 46.88161182],
       [44.84215052, 46.11681524, 45.19225355, ..., 45.92599974,
        45.8223662 , 45.8223662 ],
       [47.13315486, 46.22032436, 51.71196649, ..., 45.08483556,
        44.90558878, 47.64043206]], shape=(4, 2000))

array([[2.78168717, 3.44444204, 4.17885802, ..., 3.53233375, 4.38489804,
        4.15370107],
       [1.36675783, 4.98079098, 4.46553885, ..., 3.43496379, 3.06636269,
        2.35600477],
       [3.90610601, 3.51439387, 3.51472321, ..., 3.13713231, 3.1864571 ,
        3.1864571 ],
       [2.63362691, 2.93660118, 1.15784165, ..., 3.11112661, 3.38336553,
        2.73519885]], shape=(4, 2000))

result = pyreadr.read_r('/Users/cairanvanrooyen/Desktop/personal/website/alfolio-project/assets/ros_data/sexratio.rda')
print("Keys in the R data file:", result.keys()) # Print the keys to see what data frames are available
df = result[list(result.keys())[0]]
# rename columns to something more convenient
df.columns = ['attractiveness', 'percentage_female']
display(df.head())

fit_and_plot_lm(data = df, predictors = ['attractiveness'], outcome = 'percentage_female', add_constant=True, show_plot=True, scatter_kws=None, line_kws=None)

# Compute data-scaled priors (rstanarm defaults)
sd_x = df['attractiveness'].std()
sd_y = df['percentage_female'].std()
mean_y = df['percentage_female'].mean()

fit_and_plot_bayes(data = df, predictor = 'attractiveness', outcome = 'percentage_female',
                       intercept_mu=mean_y, intercept_sigma=2.5 * sd_y,
                       slope_mu=0, slope_sigma=2.5 * sd_y / sd_x,
                       sigma_sigma=sd_y,
                       samples=2000, tune=1000, hdi_prob=0.95,
                       show_trace=True, show_forest=True,
                       show_posterior=True, show_regression=True,
                       n_regression_lines=100)

Keys in the R data file: odict_keys(['sexratio'])

                            OLS Regression Results                            
==============================================================================
Dep. Variable:      percentage_female   R-squared:                       0.284
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                     1.190
Date:                Tue, 31 Mar 2026   Prob (F-statistic):              0.355
Time:                        09:01:03   Log-Likelihood:                -13.166
No. Observations:                   5   AIC:                             30.33
Df Residuals:                       3   BIC:                             29.55
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
attractiveness     1.5000      1.375      1.091      0.355      -2.875       5.875
const             49.4000      1.944     25.409      0.000      43.213      55.587
==============================================================================
Omnibus:                          nan   Durbin-Watson:                   2.698
Prob(Omnibus):                    nan   Jarque-Bera (JB):                0.658
Skew:                          -0.132   Prob(JB):                        0.720
Kurtosis:                       1.242   Cond. No.                         1.41
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Formula: percentage_female = 49.40 + 1.50*attractiveness
Residual std dev (σ): 4.35 ± 1.77
MAD of residuals: 4.45

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/statsmodels/stats/stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 5 samples were given.
  warn("omni_normtest is not valid with less than 8 observations; %i "

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.
There were 2 divergences after tuning. Increase `target_accept` or reparameterize.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  49.365  2.165    44.768     53.504      0.030    0.034    5536.0   
slope       1.435  1.560    -1.670      4.630      0.022    0.025    4951.0   
sigma       4.820  1.660     2.244      8.182      0.027    0.023    3598.0   

           ess_tail  r_hat  
intercept    4486.0    1.0  
slope        3993.0    1.0  
sigma        3952.0    1.0

<xarray.Dataset> Size: 208kB
Dimensions:    (chain: 4, draw: 2000)
Coordinates:
  * chain      (chain) int64 32B 0 1 2 3
  * draw       (draw) int64 16kB 0 1 2 3 4 5 6 ... 1994 1995 1996 1997 1998 1999
Data variables:
    intercept  (chain, draw) float64 64kB 47.97 51.51 51.8 ... 47.69 49.62 47.21
    slope      (chain, draw) float64 64kB 1.076 1.565 1.742 ... -0.1376 3.067
    sigma      (chain, draw) float64 64kB 5.653 2.603 3.669 ... 4.913 4.039
Attributes:
    created_at:                 2026-03-31T08:01:08.701918+00:00
    arviz_version:              0.23.4
    inference_library:          pymc
    inference_library_version:  5.28.2
    sampling_time:              0.6219639778137207
    tuning_steps:               1000

array([0, 1, 2, 3])

# We would expect percentage of girls to be around 48.8% with standard deviation of 0.5%
# Slope of mean 0 and SD0.2 says that we think the slope is likely close to zero, but could plausibly be as large as ±0.4 (2 SDs) in either direction, which is a wide range of plausible effects given the scale of the data.

fit_and_plot_bayes(data = df, predictor = 'attractiveness', outcome = 'percentage_female',
                       intercept_mu=48.8, intercept_sigma=0.5,
                       slope_mu=0, slope_sigma=0.2,
                       sigma_sigma=sd_y,
                       samples=2000, tune=1000, hdi_prob=0.95,
                       show_trace=True, show_forest=True,
                       show_posterior=True, show_regression=True,
                       n_regression_lines=100)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, sigma]

/opt/anaconda3/envs/ros_pymc/lib/python3.12/site-packages/rich/live.py:260: UserWarning: install "ipywidgets" for 
Jupyter support
  warnings.warn('install "ipywidgets" for Jupyter support')

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 1 seconds.
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.

             mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  ess_bulk  \
intercept  48.842  0.487    47.933     49.849      0.005    0.005   10927.0   
slope       0.034  0.199    -0.364      0.419      0.002    0.002   11023.0   
sigma       4.511  1.400     2.283      7.316      0.015    0.020   10800.0   

           ess_tail  r_hat  
intercept    5891.0    1.0  
slope        5935.0    1.0  
sigma        6029.0    1.0

<xarray.Dataset> Size: 208kB
Dimensions:    (chain: 4, draw: 2000)
Coordinates:
  * chain      (chain) int64 32B 0 1 2 3
  * draw       (draw) int64 16kB 0 1 2 3 4 5 6 ... 1994 1995 1996 1997 1998 1999
Data variables:
    intercept  (chain, draw) float64 64kB 48.68 49.31 49.66 ... 47.84 48.11
    slope      (chain, draw) float64 64kB -0.1048 0.06646 ... -0.226 -0.1092
    sigma      (chain, draw) float64 64kB 6.337 3.746 2.613 ... 3.621 3.526
Attributes:
    created_at:                 2026-03-31T08:13:40.384570+00:00
    arviz_version:              0.23.4
    inference_library:          pymc
    inference_library_version:  5.28.2
    sampling_time:              0.5975902080535889
    tuning_steps:               1000

array([0, 1, 2, 3])

array([   0,    1,    2, ..., 1997, 1998, 1999], shape=(2000,))

array([[48.68100799, 49.31380386, 49.65662649, ..., 48.72431309,
        49.00901322, 48.24027251],
       [48.39601599, 48.39601599, 48.73614411, ..., 49.052351  ,
        48.72303934, 48.51450279],
       [48.47399748, 48.58766794, 48.69536756, ..., 48.86666175,
        48.05203723, 48.05203723],
       [48.77304969, 48.28802382, 48.26675755, ..., 48.31785084,
        47.84025479, 48.11029177]], shape=(4, 2000))

array([[-0.10484887,  0.06645644,  0.03451541, ...,  0.17859626,
        -0.04814886,  0.04362703],
       [ 0.22006328,  0.22006328,  0.22417424, ..., -0.13165776,
         0.28807922, -0.23148399],
       [ 0.23342795, -0.03999434, -0.06626784, ...,  0.26574407,
        -0.09393707, -0.09393707],
       [ 0.13306919, -0.02774633, -0.02493342, ...,  0.11320105,
        -0.22601899, -0.10919875]], shape=(4, 2000))

	year	growth	vote	inc_party_candidate	other_candidate
0	1952	2.40	44.60	Stevenson	Eisenhower
1	1956	2.89	57.76	Eisenhower	Stevenson
2	1960	0.85	49.91	Nixon	Kennedy
3	1964	4.21	61.34	Johnson	Goldwater
4	1968	3.02	49.60	Humphrey	Nixon
5	1972	3.62	61.79	Nixon	McGovern
6	1976	1.08	48.95	Ford	Carter
7	1980	-0.39	44.70	Carter	Reagan
8	1984	3.86	59.17	Reagan	Mondale
9	1988	2.27	53.94	Bush, Sr.	Dukakis

	growth	point_pred	linpred_median	linpred_mad	pred_median	pred_mad
0	-2.0	40.095471	40.098814	3.143626	40.151361	5.153935
1	-1.5	41.629788	41.645276	2.750526	41.702306	4.955122
2	-1.0	43.164105	43.179480	2.394542	43.215047	4.705621
3	-0.5	44.698422	44.704079	2.084559	44.723523	4.466381
4	0.0	46.232739	46.242357	1.764202	46.313202	4.382266
5	0.5	47.767056	47.790697	1.476641	47.768615	4.302465
6	1.0	49.301373	49.305630	1.231637	49.326583	4.205838
7	1.5	50.835690	50.843929	1.047895	50.843407	4.129710
8	2.0	52.370007	52.371053	1.000356	52.466981	4.161060
9	2.5	53.904324	53.915128	1.102440	53.860832	4.264261
10	3.0	55.438641	55.445861	1.274693	55.359260	4.121965
11	3.5	56.972958	56.972529	1.515940	56.920709	4.172369
12	4.0	58.507275	58.512106	1.799375	58.560900	4.438528

	height	weight	male	earn	earnk	ethnicity	education	mother_education	father_education	walk	exercise	smokenow	tense	angry	age	earn_k	c_height
0	74	210.0	1	50000.0	50.0	White	16.0	16.0	16.0	3	3	2.0	0.0	0.0	45	50.0	8
1	66	125.0	0	60000.0	60.0	White	16.0	16.0	16.0	6	5	1.0	0.0	0.0	58	60.0	0
2	64	126.0	0	30000.0	30.0	White	16.0	16.0	16.0	8	1	2.0	1.0	1.0	29	30.0	-2
3	65	200.0	0	25000.0	25.0	White	17.0	17.0	NaN	8	1	2.0	0.0	0.0	57	25.0	-1
4	63	110.0	0	50000.0	50.0	Other	16.0	16.0	16.0	5	6	2.0	0.0	0.0	91	50.0	-3

Parameter type	Typical prior	Notes
Unbounded real (intercept, slope)	`pm.Normal(0, σ)`	`σ` scaled to data
Positive (std dev, variance)	`pm.HalfNormal(σ)` or `pm.HalfCauchy(β)`	`HalfCauchy` has heavier tails
Positive (precision, rate)	`pm.Gamma(α, β)` or `pm.Exponential(λ)`
Bounded [0,1] (probabilities)	`pm.Beta(α, β)`	`Beta(1,1)` = uniform on [0,1]
Categorical weights	`pm.Dirichlet(a)`	`a = [1,1,...,1]` = uniform
Correlation matrices	`pm.LKJCorr(η)`	`η=1` = uniform over correlations

9. Prediction and Bayesian inference¶

9.1 Propagating uncertainty in inference using posterior simulations¶

9.2 Prediction and uncertainty: predict, posterior_linpred, and posterior_predict¶

Point prediction¶

Linear predictor with uncertainty¶

Linear prediction with uncertainty¶

Predictive distribution for a new observation using posterior_predict¶

Prediction given a range of input values¶

Propagating uncertainty¶

Simulating uncertainty for the linear predictor and new observations¶

9.3 Prior information and Bayesian synthesis¶

Expressing data and prior information on the same scale¶

Bayesian information aggregation¶

Different ways of assigning prior distributions and performing Bayesian calculations¶

9.4 Example of Bayesian inference: beauty and sex ratio¶

Prior information¶

Prior estimate and standard error¶

Data estimate and standard error¶

Bayes estimate¶

Understanding the result¶

9.5 Uniform, weakly informative, and informative priors in regression¶

Uniform priors¶

Default prior distribution¶

Weakly informative prior distribution based on subject-matter knowledge¶

Example where an informative prior makes a difference: Beauty and sex ratio¶

9.6 Bibliographic note¶

9.7 Exercises¶