Skip to content

Commit dfb53e1

Browse files
committed
Optimize stream processing by reducing memory allocation
1 parent e063063 commit dfb53e1

File tree

2 files changed

+23
-16
lines changed

2 files changed

+23
-16
lines changed

src/soxr_ext.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ static soxr_datatype_t to_soxr_split_dtype(const type_info& ntype) {
6161
class CSoxr {
6262
soxr_t _soxr = nullptr;
6363
const double _oi_rate;
64+
std::unique_ptr<uint8_t[]> _y_buf;
65+
size_t _y_buf_size = 0;
6466

6567
public:
6668
const double _in_rate;
@@ -121,12 +123,20 @@ class CSoxr {
121123

122124
const size_t ilen = x.shape(0);
123125

124-
// This is slower then allocating fixed `ilen * _oi_rate`.
125-
// But it insures lowest output delay provided by libsoxr.
126+
// This is slower than returning fixed `ilen * _oi_rate` buffers w/o copying.
127+
// But it ensures the lowest output delay provided by libsoxr.
126128
const size_t olen = soxr_delay(_soxr) + ilen * _oi_rate + 1;
127129

128-
// alloc
129-
y = new T[olen * channels] { 0 };
130+
// Reuse output buffer if possible, else reallocate
131+
size_t req_size = sizeof(T) * olen * channels;
132+
if (!_y_buf || _y_buf_size < req_size) {
133+
// Grow to next power of 2
134+
size_t new_size = 1;
135+
while (new_size < req_size) new_size <<= 1;
136+
_y_buf = std::make_unique<uint8_t[]>(new_size);
137+
_y_buf_size = new_size;
138+
}
139+
y = reinterpret_cast<T*>(_y_buf.get());
130140

131141
// divide long input and process
132142
size_t odone = 0;
@@ -150,15 +160,11 @@ class CSoxr {
150160
}
151161

152162
if (err) {
153-
delete[] y;
154163
throw std::runtime_error(err);
155164
}
156165

157-
// Delete 'y' when the 'owner' capsule expires
158-
nb::capsule owner(y, [](void *p) noexcept {
159-
delete[] (T *) p;
160-
});
161-
return ndarray<nb::numpy, T>(y, { out_pos, channels }, owner);
166+
// Return a copy
167+
return ndarray<nb::numpy, T>(y, { out_pos, channels }).cast();
162168
}
163169

164170
size_t num_clips() { return *soxr_num_clips(_soxr); }

tests/bench.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,15 @@
5656

5757
# soxr with clear()
5858
# It becomes faster then soxr.resample() when input length (=LEN) is short
59-
rs = soxr.ResampleStream(P, Q, sig.shape[1], dtype=sig.dtype, quality=QUALITY)
59+
if hasattr(soxr.ResampleStream, 'clear'):
60+
rs = soxr.ResampleStream(P, Q, sig.shape[1], dtype=sig.dtype, quality=QUALITY)
6061

61-
def soxr_with_reset():
62-
rs.clear()
63-
return rs.resample_chunk(sig, last=True)
62+
def soxr_with_reset():
63+
rs.clear()
64+
return rs.resample_chunk(sig, last=True)
6465

65-
t = timeit.timeit(soxr_with_reset, number=REPEAT)
66-
print(f'soxr w/ clear(): {t:f} (sec)')
66+
t = timeit.timeit(soxr_with_reset, number=REPEAT)
67+
print(f'soxr w/ clear(): {t:f} (sec)')
6768

6869

6970
# soxr stream chunk processing

0 commit comments

Comments
 (0)