diff --git a/doc/usage_command_line.md b/doc/usage_command_line.md index b5f107f6..2e5418e3 100644 --- a/doc/usage_command_line.md +++ b/doc/usage_command_line.md @@ -1676,6 +1676,7 @@ SYNOPSIS [--out_format 01|b8|r8|ptb64|hits|dets] \ [--seed int] \ [--shots int] \ + [--skip_loop_folding] \ [--skip_reference_sample] DESCRIPTION @@ -1762,6 +1763,30 @@ OPTIONS Must be an integer between 0 and a quintillion (10^18). + --skip_loop_folding + Skips loop folding logic on the reference sample calculation. + + When this argument is specified, the reference sample (that is used + to convert measurement flip data from frame simulations into actual + measurement data) is generated by iterating through the entire + flattened circuit with no loop detection. + + Loop folding can enormously improve performance for circuits + containing REPEAT blocks with large repeat counts, by detecting + periodicity in loops and fast-forwarding across them when computing + the reference sample for the circuit. However, in some cases the + analysis is not able to detect the periodicity that is present. For + example, this has been observed in honeycomb code circuits. When + this happens, the folding-capable analysis is slower than simply + analyzing the flattened circuit without any specialized loop logic. + The `--skip_loop_folding` flag can be used to just analyze the + flattened circuit, bypassing this slowdown for circuits such as + honeycomb code circuits. + + By default, loop detection is enabled. Pass this flag to disable + it (when appropriate by use case). + + --skip_reference_sample Asserts the circuit can produce a noiseless sample that is just 0s. diff --git a/src/stim/cmd/command_sample.cc b/src/stim/cmd/command_sample.cc index 2d94acdc..01d85f59 100644 --- a/src/stim/cmd/command_sample.cc +++ b/src/stim/cmd/command_sample.cc @@ -21,18 +21,20 @@ #include "stim/simulators/tableau_simulator.h" #include "stim/util_bot/arg_parse.h" #include "stim/util_bot/probability_util.h" +#include "stim/util_top/reference_sample_tree.h" using namespace stim; int stim::command_sample(int argc, const char **argv) { check_for_unknown_arguments( - {"--seed", "--skip_reference_sample", "--out_format", "--out", "--in", "--shots"}, + {"--seed", "--skip_reference_sample", "--skip_loop_folding", "--out_format", "--out", "--in", "--shots"}, {"--sample", "--frame0"}, "sample", argc, argv); const auto &out_format = find_enum_argument("--out_format", "01", format_name_to_enum_map(), argc, argv); bool skip_reference_sample = find_bool_argument("--skip_reference_sample", argc, argv); + bool skip_loop_folding = find_bool_argument("--skip_loop_folding", argc, argv); uint64_t num_shots = find_argument("--shots", argc, argv) ? (uint64_t)find_int64_argument("--shots", 1, 0, INT64_MAX, argc, argv) : find_argument("--sample", argc, argv) ? (uint64_t)find_int64_argument("--sample", 1, 0, INT64_MAX, argc, argv) @@ -56,7 +58,13 @@ int stim::command_sample(int argc, const char **argv) { auto circuit = Circuit::from_file(in); simd_bits ref(0); if (!skip_reference_sample) { - ref = TableauSimulator::reference_sample_circuit(circuit); + if (skip_loop_folding) { + ref = TableauSimulator::reference_sample_circuit(circuit); + } else { + ReferenceSampleTree reference_sample_measurement_bits = + ReferenceSampleTree::from_circuit_reference_sample(circuit.aliased_noiseless_circuit()); + reference_sample_measurement_bits.decompress_into(ref); + } } sample_batch_measurements_writing_results_to_disk(circuit, ref, num_shots, out, out_format.id, rng); } @@ -128,6 +136,37 @@ SubCommandHelp stim::command_sample_help() { )PARAGRAPH"), }); + result.flags.push_back( + SubCommandHelpFlag{ + "--skip_loop_folding", + "bool", + "false", + {"[none]", "[switch]"}, + clean_doc_string(R"PARAGRAPH( + Skips loop folding logic on the reference sample calculation. + + When this argument is specified, the reference sample (that is used + to convert measurement flip data from frame simulations into actual + measurement data) is generated by iterating through the entire + flattened circuit with no loop detection. + + Loop folding can enormously improve performance for circuits + containing REPEAT blocks with large repeat counts, by detecting + periodicity in loops and fast-forwarding across them when computing + the reference sample for the circuit. However, in some cases the + analysis is not able to detect the periodicity that is present. For + example, this has been observed in honeycomb code circuits. When + this happens, the folding-capable analysis is slower than simply + analyzing the flattened circuit without any specialized loop logic. + The `--skip_loop_folding` flag can be used to just analyze the + flattened circuit, bypassing this slowdown for circuits such as + honeycomb code circuits. + + By default, loop detection is enabled. Pass this flag to disable + it (when appropriate by use case). + )PARAGRAPH"), + }); + result.flags.push_back( SubCommandHelpFlag{ "--out_format", diff --git a/src/stim/util_top/reference_sample_tree.h b/src/stim/util_top/reference_sample_tree.h index 92dfcf14..366f4e7d 100644 --- a/src/stim/util_top/reference_sample_tree.h +++ b/src/stim/util_top/reference_sample_tree.h @@ -37,6 +37,10 @@ struct ReferenceSampleTree { /// Writes the contents of the tree into the given output vector. void decompress_into(std::vector &output) const; + /// Writes the contents of the tree into the given output simd_bits. + template + void decompress_into(simd_bits &output) const; + /// Folds redundant children into the repetition count, if they repeat this many times. /// /// For example, if the tree's children are [A, B, C, A, B, C] and the tree has no diff --git a/src/stim/util_top/reference_sample_tree.inl b/src/stim/util_top/reference_sample_tree.inl index 94c52e27..670ad97c 100644 --- a/src/stim/util_top/reference_sample_tree.inl +++ b/src/stim/util_top/reference_sample_tree.inl @@ -2,6 +2,19 @@ namespace stim { +template +void ReferenceSampleTree::decompress_into(simd_bits &output) const { + std::vector v; + this->decompress_into(v); + + simd_bits result(v.size()); + for (size_t k = 0; k < v.size(); k++) { + result[k] ^= v[k]; + } + + output = std::move(result); +} + template ReferenceSampleTree CompressedReferenceSampleHelper::do_loop_with_no_folding(const Circuit &loop, uint64_t reps) { ReferenceSampleTree result;