From 8f8b87fcabd7fa8920f11202b6704886265f67c3 Mon Sep 17 00:00:00 2001 From: speco29 Date: Thu, 23 Jan 2025 22:45:25 +0530 Subject: [PATCH 1/3] fixes #437 Adding piecewise regression examples in example docs --- README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/README.md b/README.md index 8b67ed48..5ff0694f 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,63 @@ fitted = model.fit() After this, we can evaluate the model as before. + +### Piecewise Regression + +We'll use synthetic data to demonstrate the piecewise regression. + +import numpy as np +import pandas as pd +import statsmodels.formula.api as smf + +# Generate synthetic data +np.random.seed(0) +x = np.linspace(0, 30, 100) +y = 3 + 0.5 * x + 1.5 * truncate(x, 10) + 2.0 * truncate(x, 20) + np.random.normal(0, 1, 100) + +# Create a DataFrame +df = pd.DataFrame({'x': x, 'response': y}) + +# Define the truncate function +def truncate(x, l): + return (x - l) * (x >= l) + +# Fit the piecewise regression model +formula = "response ~ x + truncate(x, 10) + truncate(x, 20)" +model = smf.ols(formula, data=df).fit() + +# Display the model summary +print(model.summary()) + +The output will include the summary of the fitted regression model, showing coefficients for each term: + + OLS Regression Results +============================================================================== +Dep. Variable: response R-squared: 0.963 +Model: OLS Adj. R-squared: 0.962 +Method: Least Squares F-statistic: 838.8 +Date: Thu, 23 Jan 2025 Prob (F-statistic): 7.62e-71 +Time: 22:42:00 Log-Likelihood: -136.22 +No. Observations: 100 AIC: 280.4 +Df Residuals: 96 BIC: 290.9 +Df Model: 3 +Covariance Type: nonrobust +============================================================================== + coef std err t P>|t| [0.025 0.975] +------------------------------------------------------------------------------ +Intercept 3.1241 0.150 20.842 0.000 2.826 3.422 +x 0.4921 0.014 34.274 0.000 0.465 0.519 +truncate(10) 1.4969 0.024 61.910 0.000 1.449 1.544 +truncate(20) 1.9999 0.018 110.199 0.000 1.964 2.036 +============================================================================== +Omnibus: 0.385 Durbin-Watson: 2.227 +Prob(Omnibus): 0.825 Jarque-Bera (JB): 0.509 +Skew: -0.046 Prob(JB): 0.775 +Kurtosis: 2.669 Cond. No. 47.4 +============================================================================== + +This summary shows the coefficients for the intercept, x, truncate(x, 10), and truncate(x, 20) terms, along with their statistical significance. + ### More For a more in-depth introduction to Bambi see our [Quickstart](https://github.com/bambinos/bambi#quickstart) and check the notebooks in the [Examples](https://bambinos.github.io/bambi/notebooks/) webpage. From 0a319060489bb8c1c83a436901b2561c47f2ef30 Mon Sep 17 00:00:00 2001 From: speco29 Date: Fri, 24 Jan 2025 23:55:58 +0530 Subject: [PATCH 2/3] Changed the piecewise regression example using bambi --- README.md | 139 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 5ff0694f..3f55b07c 100644 --- a/README.md +++ b/README.md @@ -125,60 +125,95 @@ After this, we can evaluate the model as before. ### Piecewise Regression +Let's walk through an example of performing a piecewise regression using Bambi. + +```python + +# Create example data +np.random.seed(42) +x = np.linspace(0, 10, 100) +y = np.where(x < 5, 2*x + np.random.normal(0, 1, 100), -3*x + 20 + np.random.normal(0, 1, 100)) +data = pd.DataFrame({'x': x, 'y': y}) + +# Plot the data +plt.scatter(data['x'], data['y']) +plt.xlabel('x') +plt.ylabel('y') +plt.show() + +# Define the model with a spline term for 'x' +model = bmb.Model('y ~ 0 + x + (x > 5) + (x - 5) * (x > 5)', data) + +# Fit the model +results = model.fit(draws=2000, cores=2) + +# Summarize the results +print(results.summary()) + +# Predict and plot the fitted values +x_pred = np.linspace(0, 10, 100) +data_pred = pd.DataFrame({'x': x_pred}) +y_pred = model.predict(idata=results, data=data_pred).posterior_predictive.mean(axis=0) + +plt.scatter(data['x'], data['y'], label='Data') +plt.plot(x_pred, y_pred, color='red', label='Fitted Piecewise Regression') +plt.xlabel('x') +plt.ylabel('y') +plt.legend() +plt.show() +``` + +This should give you a piecewise regression model fitted to your data using Bambi. The plot will show the original data points and the fitted piecewise regression line. + +# Potential + +we'll demonstrate the concept of potential in a probabilistic model using a likelihood function. In this case, we'll use a Gaussian distribution (Normal distribution) to represent the likelihood and add a potential function to constrain the model. + +```python +def likelihood(x, mu, sigma): + """ + Gaussian likelihood function + """ + return (1 / (np.sqrt(2 * np.pi) * sigma)) * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + +def potential(x): + """ + Potential function to constrain the model + """ + # Example potential: Quadratic potential centered at x = 2 + return np.exp(-0.5 * (x - 2) ** 2) + +def posterior(x, mu, sigma): + """ + Posterior function combining likelihood and potential + """ + return likelihood(x, mu, sigma) * potential(x) + +# Define parameters +mu = 0 +sigma = 1 +x_values = np.linspace(-5, 5, 100) + +# Calculate likelihood, potential, and posterior +likelihood_values = likelihood(x_values, mu, sigma) +potential_values = potential(x_values) +posterior_values = posterior(x_values, mu, sigma) + +# Plot the functions +plt.figure(figsize=(10, 6)) +plt.plot(x_values, likelihood_values, label='Likelihood', linestyle='--') +plt.plot(x_values, potential_values, label='Potential', linestyle=':') +plt.plot(x_values, posterior_values, label='Posterior') +plt.xlabel('x') +plt.ylabel('Probability Density') +plt.title('Likelihood, Potential, and Posterior') +plt.legend() +plt.show() +``` + +This example visually demonstrates how adding a potential function can constrain the model and influence the resulting distribution. -We'll use synthetic data to demonstrate the piecewise regression. -import numpy as np -import pandas as pd -import statsmodels.formula.api as smf - -# Generate synthetic data -np.random.seed(0) -x = np.linspace(0, 30, 100) -y = 3 + 0.5 * x + 1.5 * truncate(x, 10) + 2.0 * truncate(x, 20) + np.random.normal(0, 1, 100) - -# Create a DataFrame -df = pd.DataFrame({'x': x, 'response': y}) - -# Define the truncate function -def truncate(x, l): - return (x - l) * (x >= l) - -# Fit the piecewise regression model -formula = "response ~ x + truncate(x, 10) + truncate(x, 20)" -model = smf.ols(formula, data=df).fit() - -# Display the model summary -print(model.summary()) - -The output will include the summary of the fitted regression model, showing coefficients for each term: - - OLS Regression Results -============================================================================== -Dep. Variable: response R-squared: 0.963 -Model: OLS Adj. R-squared: 0.962 -Method: Least Squares F-statistic: 838.8 -Date: Thu, 23 Jan 2025 Prob (F-statistic): 7.62e-71 -Time: 22:42:00 Log-Likelihood: -136.22 -No. Observations: 100 AIC: 280.4 -Df Residuals: 96 BIC: 290.9 -Df Model: 3 -Covariance Type: nonrobust -============================================================================== - coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------- -Intercept 3.1241 0.150 20.842 0.000 2.826 3.422 -x 0.4921 0.014 34.274 0.000 0.465 0.519 -truncate(10) 1.4969 0.024 61.910 0.000 1.449 1.544 -truncate(20) 1.9999 0.018 110.199 0.000 1.964 2.036 -============================================================================== -Omnibus: 0.385 Durbin-Watson: 2.227 -Prob(Omnibus): 0.825 Jarque-Bera (JB): 0.509 -Skew: -0.046 Prob(JB): 0.775 -Kurtosis: 2.669 Cond. No. 47.4 -============================================================================== - -This summary shows the coefficients for the intercept, x, truncate(x, 10), and truncate(x, 20) terms, along with their statistical significance. ### More From abe5384cf989716f4edbbc62ac247f7b6518c378 Mon Sep 17 00:00:00 2001 From: speco29 Date: Sun, 26 Jan 2025 12:22:54 +0530 Subject: [PATCH 3/3] fixes #644 Added more questions for FAQ. --- docs/faq.qmd | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/faq.qmd b/docs/faq.qmd index 9f427a95..6b656491 100644 --- a/docs/faq.qmd +++ b/docs/faq.qmd @@ -14,7 +14,17 @@ inference. * PyMC is a library for Bayesian modelling, and is the backend used by Bambi. It is a very powerful library, but can be challenging to use for beginners. Bambi provides a simple interface for specifying models, and allows for easy inference via -MCMC or variational inference using PyMC. + +### Why have a Bayesian regression library? + +Bayesian modelling allows flexible (read 'bespoke') model specification and also provides an +estimation of uncertainty in the model parameters. Both of these are wildly useful in +practice, in particular in a business context where the model is used to make decisions, +and where a complex model may be needed to capture the underlying relationships. Further, +Bayesian modelling allows graceful handling of small sample sizes by judicious use of +prior distributions. + +### ## Inference Questions @@ -33,6 +43,18 @@ Yes, Bambi supports inference on GPUs and TPUs using the numpyro and blackjax ba See the API for "fit" method for more details [here](https://bambinos.github.io/bambi/api/Model.html#bambi.Model.fit). +### My sampler through errors/indicating divergences, what should I do? + +* Divergences are a common issue in Bayesian modelling, and are usually not a problem as long as +they are not prevalent. However, if you are seeing a lot of divergences, you may want +to try 1) respecifying your model, 2) a different sampler. +* If the sampler fails, this is likely an issue with model specification. Make sure you are using +the correct priors for your model, and that you are not specifying a prior that is too +strong (e.g. a prior that is too narrow), or one that does not match the data (e.g. a +prior that doesn't cover the domain of the data such as using a HalfNormal prior for a +parameter that can be negative). + + ## Model Specification Questions ### My data has a non-normal distributions, can I still use Bambi?