Skip to content

Commit

Permalink
Merge branch 'main' into docs/update
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics committed Nov 30, 2024
2 parents fd1579c + 25a957e commit d0a255b
Show file tree
Hide file tree
Showing 45 changed files with 2,557 additions and 294 deletions.
30 changes: 29 additions & 1 deletion .github/workflows/build-conan-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,30 @@ jobs:
path: ${{ env.CONAN_HOME }}/p
lookup-only: true

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
cat << 'EOF' | tee script.sh > /dev/null
#!usr/bin/env bash
set -u
set -e
conan_version="$1"
PATH="/opt/python/cp312-cp312/bin:$PATH"
pip install "conan==$conan_version"
conan remote update conancenter --url https://center2.conan.io
EOF
chmod 755 script.sh
docker run \
-e "CONAN_HOME=$CONAN_HOME" \
-v "$PWD/script.sh:/tmp/script.sh:ro" \
-v "$CONAN_HOME:$CONAN_HOME" \
"$IMAGE" /tmp/script.sh '${{ inputs.conan-version }}'
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -269,7 +293,9 @@ jobs:

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: conan profile detect --force
run: |
conan profile detect --force
conan remote update conancenter --url https://center2.conan.io
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
Expand Down Expand Up @@ -380,6 +406,8 @@ jobs:
sed -i 's/compiler\.cppstd=.*/compiler.cppstd=${{ inputs.cppstd }}/' "$conan_profile"
conan remote update conancenter --url https://center2.conan.io
- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/fuzzy-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ jobs:
key: conan-${{ steps.cache-key.outputs.key }}
path: ${{ env.CONAN_HOME }}/p

- name: Configure Conan
if: steps.cache-conan.outputs.cache-hit != 'true'
run: conan remote update conancenter --url https://center2.conan.io

- name: Clean Conan cache (pre-build)
if: steps.cache-conan.outputs.cache-hit != 'true'
run: |
Expand Down Expand Up @@ -262,6 +266,17 @@ jobs:
*".${{ matrix.format }}" \
*.cool
- name: Run test (describe)
run: |
test/scripts/fuzzer.py \
--resolution ${{ steps.test-params.outputs.resolution }} \
--duration '${{ steps.test-params.outputs.duration }}' \
--normalization ${{ matrix.normalization }} \
--nproc $(nproc) \
--format describe \
*".${{ matrix.format }}" \
*.cool
fuzzy-testing-status-check:
name: Status Check (fuzzy-testing)
if: ${{ always() }}
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Add requirements
run: python -m pip install --upgrade wheel setuptools
run: python -m pip install --upgrade conan wheel setuptools

- name: Generate cache key
id: cache-key
Expand All @@ -69,6 +69,9 @@ jobs:
echo "conan-key=pip-${{ matrix.os }}-$hash" >> $GITHUB_OUTPUT
- name: Configure Conan
run: conan remote update conancenter --url https://center2.conan.io

- name: Restore Conan cache
id: cache-conan
uses: actions/cache/restore@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ jobs:
fail-on-cache-miss: true

- name: Build wheels
uses: pypa/cibuildwheel@v2.21
uses: pypa/cibuildwheel@v2.22
with:
only: ${{ matrix.wheel-config }}
env:
Expand Down
12 changes: 6 additions & 6 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ def _min_cppstd(self):
return 17

def requirements(self):
self.requires("arrow/17.0.0#81be2aa6c49800df8cc163adf4b99e9f")
self.requires("boost/1.86.0#cd839a2082585255010f9e82eea94c7f", force=True)
self.requires("arrow/18.1.0#032d83f98246ca1d0facc6413141392e")
self.requires("boost/1.86.0#d6fc1753c34b475fc7d4c23bdb8143fb", force=True)
self.requires("bshoshany-thread-pool/4.1.0#be1802a8768416a6c9b1393cf0ce5e9c")
self.requires("concurrentqueue/1.0.4#1e48e1c712bcfd892087c9c622a51502")
self.requires("eigen/3.4.0#2e192482a8acff96fe34766adca2b24c")
self.requires("fast_float/6.1.5#e067b96a6271d1b4c255858ca9805bdd")
self.requires("fast_float/7.0.0#e4a4a338590ab5eaaf517c64607629d0")
self.requires("fmt/11.0.2#5c7438ef4d5d69ab106a41e460ce11f3", force=True)
self.requires("hdf5/1.14.4.3#df1467d7374938c231edbe10e83f2bb4", force=True)
self.requires("hdf5/1.14.5#51799cda2ba7acaa74c9651dea284ac4", force=True)
self.requires("highfive/2.10.0#3d1bd25944a57fa1bc30a0a22923d528")
self.requires("libdeflate/1.22#f95aebe763153ccbc4cc76c023e42e5a")
self.requires("parallel-hashmap/1.4.0#36ac84df77219748440cdb0f23624d56")
self.requires("readerwriterqueue/1.0.6#aaa5ff6fac60c2aee591e9e51b063b83")
self.requires("span-lite/0.11.0#519fd49fff711674cfed8cd17d4ed422")
self.requires("spdlog/1.14.1#972bbf70be1da4bc57ea589af0efde03")
self.requires("spdlog/1.15.0#da21f74dd84627fa68601c4e3b9c3f00")
self.requires("zstd/1.5.6#afefe79a309bc2a7b9f56c2093504c8b", force=True)

def validate(self):
Expand Down Expand Up @@ -93,7 +93,7 @@ def configure(self):
self.options["boost"].without_math = True
self.options["boost"].without_mpi = True
self.options["boost"].without_nowide = True
self.options["boost"].without_process = False
self.options["boost"].without_process = True
self.options["boost"].without_program_options = True
self.options["boost"].without_python = True
self.options["boost"].without_random = True
Expand Down
1 change: 1 addition & 0 deletions docs/api/cooler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Cooler API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
41 changes: 39 additions & 2 deletions docs/api/generic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Generic API

.. automethod:: __init__
.. automethod:: __getitem__
.. automethod:: attributes
.. automethod:: chromosomes
.. automethod:: is_hic
.. automethod:: is_mcool
.. automethod:: path
.. automethod:: resolutions

Expand All @@ -45,15 +48,49 @@ Generic API

.. automethod:: coord1
.. automethod:: coord2
.. automethod:: nnz
.. automethod:: sum
.. automethod:: to_arrow
.. automethod:: to_coo
.. automethod:: to_csr
.. automethod:: to_df
.. automethod:: to_numpy
.. automethod:: to_pandas

**Statistics**

:py:class:`hictkpy.PixelSelector` exposes several methods to compute or estimate several statistics efficiently.

The main features of these methods are:

* All statistics are computed by traversing the data only once and without caching interactions.
* All methods can be tweaked to include or exclude non-finite values.
* All functions implemented using short-circuiting to detect scenarios where the required statistics can be computed without traversing all pixels.

The following statistics are guaranteed to be exact:

* nnz
* sum
* min
* max
* mean

The rest of the supported statistics (currently variance, skewness, and kurtosis) are estimated and are thus not guaranteed to be exact.
However, in practice, the estimation is usually very accurate (relative error < 1.0e-6).

You can instruct hictkpy to compute the exact statistics by passing ``exact=True`` to :py:meth:`hictkpy.PixelSelector.describe()` and related methods.
It should be noted that for large queries this will result in slower computations and higher memory usage.

.. automethod:: describe
.. automethod:: kurtosis
.. automethod:: max
.. automethod:: mean
.. automethod:: min
.. automethod:: nnz
.. automethod:: skewness
.. automethod:: sum
.. automethod:: variance

**Iteration**

.. automethod:: __iter__

.. code-block:: ipythonconsole
Expand Down
1 change: 1 addition & 0 deletions docs/api/hic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Hi-C API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
4 changes: 4 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ find_package(
)

find_package(Arrow REQUIRED)
find_package(Boost REQUIRED)
find_package(FMT REQUIRED)
find_package(nanobind REQUIRED)
find_package(phmap REQUIRED)
find_package(spdlog REQUIRED)
find_package(Filesystem REQUIRED)

Expand Down Expand Up @@ -55,7 +57,9 @@ target_link_libraries(
hictk::hic
hictk::transformers
Arrow::arrow_$<IF:$<BOOL:${BUILD_SHARED_LIBS}>,shared,static>
Boost::headers
fmt::fmt-header-only
phmap
spdlog::spdlog_header_only
std::filesystem
)
Expand Down
8 changes: 5 additions & 3 deletions src/bin_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::vector<std::uint32_t> starts(n);
std::vector<std::uint32_t> ends(n);

const auto chrom_id_offset = static_cast<std::uint32_t>(_bins->chromosomes().at(0).is_all());

std::visit(
[&](const auto& bins) {
const auto [first_bin, last_bin] = !range.has_value()
Expand All @@ -403,16 +405,16 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::size_t i = 0;
std::for_each(first_bin, last_bin, [&](const auto& bin) {
bin_ids[i] = bin.id();
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id());
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id() - chrom_id_offset);
starts[i] = bin.start();
ends[i] = bin.end();
++i;
});
},
_bins->get());

return make_bin_table_df(chrom_names(), std::move(chrom_ids), std::move(starts), std::move(ends),
std::move(bin_ids));
return make_bin_table_df(chrom_names(false), std::move(chrom_ids), std::move(starts),
std::move(ends), std::move(bin_ids));
}

std::shared_ptr<const hictk::BinTable> BinTable::get() const noexcept { return _bins; }
Expand Down
34 changes: 26 additions & 8 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <hictk/type_traits.hpp>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -71,27 +73,37 @@ const hictk::Reference &CoolerFileWriter::chromosomes() const {
return ref;
}

std::shared_ptr<const hictk::BinTable> CoolerFileWriter::bins_ptr() const noexcept {
if (!_w) {
return {};
}

return _w->bins_ptr();
}

void CoolerFileWriter::add_pixels(const nb::object &df) {
if (!_w.has_value()) {
throw std::runtime_error(
"caught attempt to add_pixels to a .cool file that has already been finalized!");
}

const auto coo_format = nb::cast<bool>(df.attr("columns").attr("__contains__")("bin1_id"));

const auto cell_id = fmt::to_string(_w->cells().size());
auto attrs = hictk::cooler::Attributes::init(_w->resolution());
attrs.assembly = _w->attributes().assembly;

auto lck = std::make_optional<nb::gil_scoped_acquire>();
const auto coo_format = nb::cast<bool>(df.attr("columns").attr("__contains__")("bin1_id"));

const auto dtype = df.attr("__getitem__")("count").attr("dtype");
const auto dtype_str = nb::cast<std::string>(dtype.attr("__str__")());
const auto var = map_dtype_to_type(dtype_str);

std::visit(
[&](const auto &n) {
using N = hictk::remove_cvref_t<decltype(n)>;
using N = remove_cvref_t<decltype(n)>;
const auto pixels = coo_format ? coo_df_to_thin_pixels<N>(df, true)
: bg2_df_to_thin_pixels<N>(_w->bins(), df, true);
lck.reset();

auto clr = _w->create_cell<N>(cell_id, std::move(attrs),
hictk::cooler::DEFAULT_HDF5_CACHE_SIZE * 4, 1);
Expand All @@ -105,8 +117,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {
var);
}

void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
std::size_t chunk_size, std::size_t update_freq) {
hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_freq) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path));
Expand All @@ -126,7 +138,7 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
try {
std::visit(
[&](const auto &num) {
using N = hictk::remove_cvref_t<decltype(num)>;
using N = remove_cvref_t<decltype(num)>;
_w->aggregate<N>(_path.string(), false, _compression_lvl, chunk_size, update_freq);
},
_w->open("0").pixel_variant());
Expand All @@ -143,6 +155,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
_w.reset();
std::filesystem::remove(sclr_path); // NOLINT
// NOLINTEND(*-unchecked-optional-access)

return hictk::File{_path.string()};
}

hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path,
Expand Down Expand Up @@ -193,16 +207,20 @@ void CoolerFileWriter::bind(nb::module_ &m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
writer.def("bins", &get_bins_from_object<hictkpy::CoolerFileWriter>, "Get table of bins.",
nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move);

writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels,
nb::call_guard<nb::gil_scoped_release>(),
nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"),
"Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. "
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN",
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize,
nb::call_guard<nb::gil_scoped_release>(), nb::arg("log_lvl") = "WARN",
nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000,
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
// NOLINTEND(*-avoid-magic-numbers)
}
} // namespace hictkpy
Loading

0 comments on commit d0a255b

Please sign in to comment.