diff --git a/.github/workflows/version_check.jl b/.github/workflows/version_check.jl index 555c5758b..35e70bc42 100644 --- a/.github/workflows/version_check.jl +++ b/.github/workflows/version_check.jl @@ -1,6 +1,6 @@ # Set up a temporary environment just to run this script using Pkg -Pkg.activate(temp=true) +Pkg.activate(temp = true) Pkg.add(["YAML", "TOML", "JSON", "HTTP"]) import YAML import TOML @@ -18,7 +18,10 @@ end function major_minor_patch_match(vs...) first = vs[1] - all(v.:major == first.:major && v.:minor == first.:minor && v.:patch == first.:patch for v in vs) + all( + v.:major == first.:major && v.:minor == first.:minor && v.:patch == first.:patch for + v in vs + ) end """ @@ -34,7 +37,10 @@ function update_project_toml(filename, target_version::VersionNumber) open(filename, "w") do io for line in lines if occursin(r"^Turing\s*=\s*\"\d+\.\d+\"\s*$", line) - println(io, "Turing = \"$(target_version.:major).$(target_version.:minor)\"") + println( + io, + "Turing = \"$(target_version.:major).$(target_version.:minor)\"", + ) else println(io, line) end @@ -54,7 +60,10 @@ function update_quarto_yml(filename, target_version::VersionNumber) for line in lines m = match(r"^(\s+)- text:\s*\"v\d+\.\d+\"\s*$", line) if m !== nothing - println(io, "$(m[1])- text: \"v$(target_version.:major).$(target_version.:minor)\"") + println( + io, + "$(m[1])- text: \"v$(target_version.:major).$(target_version.:minor)\"", + ) else println(io, line) end @@ -108,7 +117,7 @@ if ENV["TARGET_IS_MAIN"] == "true" old_env = Pkg.project().path Pkg.activate(".") try - Pkg.add(name="Turing", version=latest_version) + Pkg.add(name = "Turing", version = latest_version) catch e # If the Manifest couldn't be updated, the error will be shown later println(e) @@ -118,14 +127,20 @@ if ENV["TARGET_IS_MAIN"] == "true" manifest_toml = TOML.parsefile(MANIFEST_TOML_PATH) manifest_version = VersionNumber(manifest_toml["deps"]["Turing"][1]["version"]) if !major_minor_patch_match(latest_version, manifest_version) - push!(errors, "Failed to update $(MANIFEST_TOML_PATH) to match latest Turing.jl version") + push!( + errors, + "Failed to update $(MANIFEST_TOML_PATH) to match latest Turing.jl version", + ) end end if isempty(errors) println("All good") else - error("The following errors occurred during version checking: \n", join(errors, "\n")) + error( + "The following errors occurred during version checking: \n", + join(errors, "\n"), + ) end else @@ -135,10 +150,12 @@ else # work as it would involve paging through the list of releases). Instead, # we just check that the minor versions match. if !major_minor_match(quarto_version, project_version, manifest_version) - error("The minor versions of Turing.jl in _quarto.yml, Project.toml, and Manifest.toml are inconsistent: - - _quarto.yml: $quarto_version_str - - Project.toml: $project_version_str - - Manifest.toml: $manifest_version - ") + error( + "The minor versions of Turing.jl in _quarto.yml, Project.toml, and Manifest.toml are inconsistent: + - _quarto.yml: $quarto_version_str + - Project.toml: $project_version_str + - Manifest.toml: $manifest_version + ", + ) end end diff --git a/_quarto.yml b/_quarto.yml index fbb6b868c..e9bba3b7b 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -24,6 +24,8 @@ website: text: Get Started - href: tutorials/coin-flipping/ text: Tutorials + - href: faq/ + text: FAQ - href: https://turinglang.org/library/ text: Libraries - href: https://turinglang.org/news/ diff --git a/faq/index.qmd b/faq/index.qmd new file mode 100644 index 000000000..b350730da --- /dev/null +++ b/faq/index.qmd @@ -0,0 +1,137 @@ +--- +title: "Frequently Asked Questions" +description: "Common questions and answers about using Turing.jl" +--- + +## Why is this variable being treated as random instead of observed? + +This is a common source of confusion. In Turing.jl, you can only condition or fix expressions that explicitly appear on the left-hand side (LHS) of a `~` statement. + +For example, if your model contains: +```julia +x ~ filldist(Normal(), 2) +``` + +You cannot directly condition on `x[2]` using `condition(model, @varname(x[2]) => 1.0)` because `x[2]` never appears on the LHS of a `~` statement. Only `x` as a whole appears there. + +However, there is an important exception: when you use the broadcasting operator `.~` with a univariate distribution, each element is treated as being separately drawn from that distribution, allowing you to condition on individual elements: + +```julia +@model function f1() + x = Vector{Float64}(undef, 3) + x .~ Normal() # Each element is a separate draw +end + +m1 = f1() | (@varname(x[1]) => 1.0) +sample(m1, NUTS(), 100) # This works! +``` + +In contrast, you cannot condition on parts of a multivariate distribution because it represents a single distribution over the entire vector: + +```julia +@model function f2() + x = Vector{Float64}(undef, 3) + x ~ MvNormal(zeros(3), I) # Single multivariate distribution +end + +m2 = f2() | (@varname(x[1]) => 1.0) +sample(m2, NUTS(), 100) # This doesn't work! +``` + +The key insight is that `filldist` creates a single distribution (not N independent distributions), which is why you cannot condition on individual elements. The distinction is not just about what appears on the LHS of `~`, but whether you're dealing with separate distributions (`.~` with univariate) or a single distribution over multiple values (`~` with multivariate or `filldist`). + +To understand more about how Turing determines whether a variable is treated as random or observed, see: +- [Core Functionality](../core-functionality/) - basic explanation of the `~` notation and conditioning + + +## Can I use parallelism / threads in my model? + +Yes, but with important caveats! There are two types of parallelism to consider: + +### 1. Parallel Sampling (Multiple Chains) +Turing.jl fully supports sampling multiple chains in parallel: +- **Multithreaded sampling**: Use `MCMCThreads()` to run one chain per thread +- **Distributed sampling**: Use `MCMCDistributed()` for distributed computing + +See the [Core Functionality guide](../core-functionality/#sampling-multiple-chains) for examples. + +### 2. Threading Within Models +Using threads inside your model (e.g., `Threads.@threads`) requires more care: + +```julia +@model function f(x) + Threads.@threads for i in eachindex(x) + x[i] ~ Normal() # UNSAFE: Assume statements in threads can crash! + end +end +``` + +**Important limitations:** +- **Observe statements**: Generally safe to use in threaded loops +- **Assume statements** (sampling statements): Often crash unpredictably or produce incorrect results +- **AD backend compatibility**: Many AD backends don't support threading. Check the [multithreaded column in ADTests](https://turinglang.org/ADTests/) for compatibility + +For safe parallelism within models, consider vectorized operations instead of explicit threading. + +## How do I check the type stability of my Turing model? + +Type stability is crucial for performance. Check out: +- [Performance Tips]({{< meta usage-performance-tips >}}) - includes specific advice on type stability +- Use `DynamicPPL.DebugUtils.model_warntype` to check type stability of your model + +## How do I debug my Turing model? + +For debugging both statistical and syntactical issues: +- [Troubleshooting Guide]({{< meta usage-troubleshooting >}}) - common errors and their solutions +- For more advanced debugging, DynamicPPL provides `DynamicPPL.DebugUtils` for inspecting model internals + +## What are the main differences between Turing and Stan syntax? + +Key syntactic differences include: + +- **Parameter blocks**: Stan requires explicit `data`, `parameters`, `transformed parameters`, and `model` blocks. In Turing, everything is defined within the `@model` macro +- **Variable declarations**: Stan requires upfront type declarations in parameter blocks. Turing infers types from the sampling statements +- **Transformed data**: Stan has a `transformed data` block for preprocessing. In Turing, data transformations should be done before defining the model +- **Generated quantities**: Stan has a `generated quantities` block. In Turing, use the approach described in [Tracking Extra Quantities]({{< meta usage-tracking-extra-quantities >}}) + +Example comparison: +```stan +// Stan +data { + int N; + vector[N] y; +} +parameters { + real mu; + real sigma; +} +model { + y ~ normal(mu, sigma); +} +``` + +```julia +# Turing +@model function my_model(y) + mu ~ Normal(0, 1) + sigma ~ truncated(Normal(0, 1), 0, Inf) + y ~ Normal(mu, sigma) +end +``` + +## Which automatic differentiation backend should I use? + +The choice of AD backend can significantly impact performance. See: +- [Automatic Differentiation Guide]({{< meta usage-automatic-differentiation >}}) - comprehensive comparison of ForwardDiff, Mooncake, ReverseDiff, and other backends +- [Performance Tips]({{< meta usage-performance-tips >}}#choose-your-ad-backend) - quick guide on choosing backends +- [AD Backend Benchmarks](https://turinglang.org/ADTests/) - performance comparisons across various models + +## I changed one line of my model and now it's so much slower; why? + +Small changes can have big performance impacts. Common culprits include: +- Type instability introduced by the change +- Switching from vectorized to scalar operations (or vice versa) +- Inadvertently causing AD backend incompatibilities +- Breaking assumptions that allowed compiler optimizations + +See our [Performance Tips]({{< meta usage-performance-tips >}}) and [Troubleshooting Guide]({{< meta usage-troubleshooting >}}) for debugging performance regressions. \ No newline at end of file