From 78ff32fc6dc948c102d817c26ec49c8ffc6f5897 Mon Sep 17 00:00:00 2001 From: victor Date: Tue, 25 Mar 2025 10:04:49 +0800 Subject: [PATCH 01/15] BREAKING CHANGE: buffer expose as bytes instead of str --- c-questdb-client | 2 +- src/questdb/extra_cpython.pxd | 4 + src/questdb/ingress.pyi | 20 +- src/questdb/ingress.pyx | 22 +- src/questdb/line_sender.pxd | 11 +- test/test.py | 71 +++-- test/test_dataframe.py | 544 +++++++++++++++++----------------- 7 files changed, 341 insertions(+), 333 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index f97c4cb1..84b09b05 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit f97c4cb164ef357560a92c8681438df5f7452bb6 +Subproject commit 84b09b0572fc2372501cc911ce46ae2073398911 diff --git a/src/questdb/extra_cpython.pxd b/src/questdb/extra_cpython.pxd index 3e794566..e848814d 100644 --- a/src/questdb/extra_cpython.pxd +++ b/src/questdb/extra_cpython.pxd @@ -27,6 +27,10 @@ cdef extern from "Python.h": str PyUnicode_FromStringAndSize( const char* u, Py_ssize_t size) + # Ditto, see comment on why not returning a `PyObject` above. + object PyBytes_FromStringAndSize( + const char* u, Py_ssize_t size) + # Must be called before accessing data or is compact check. int PyUnicode_READY(PyObject* o) except -1 diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index cc642c06..9bd4a454 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -348,8 +348,8 @@ class Buffer: Equivalent (but cheaper) to ``len(str(sender))``. """ - def __str__(self) -> str: - """Return the constructed buffer as a string. Use for debugging.""" + def peek(self) -> bytes: + """Return the constructed buffer as bytes. Use for debugging.""" def row( self, @@ -941,20 +941,20 @@ class Sender: def __enter__(self) -> Sender: """Call :func:`Sender.establish` at the start of a ``with`` block.""" - def __str__(self) -> str: + def __len__(self) -> int: """ - Inspect the contents of the internal buffer. - - The ``str`` value returned represents the unsent data. + Number of bytes of unsent data in the internal buffer. - Also see :func:`Sender.__len__`. + Equivalent (but cheaper) to ``len(str(sender))``. """ - def __len__(self) -> int: + def peek(self) -> bytes: """ - Number of bytes of unsent data in the internal buffer. + Inspect the contents of the internal buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + The ``bytes`` value returned represents the unsent data. + + Also see :func:`Sender.__len__`. """ def transaction(self, table_name: str) -> SenderTransaction: diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 51f263db..88a67279 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -829,14 +829,16 @@ cdef class Buffer: """ return line_sender_buffer_size(self._impl) - def __str__(self) -> str: + def peek(self) -> bytes: """Return the constructed buffer as a string. Use for debugging.""" - return self._to_str() + return self._to_bytes() - cdef inline object _to_str(self): - cdef size_t size = 0 - cdef const char* utf8 = line_sender_buffer_peek(self._impl, &size) - return PyUnicode_FromStringAndSize(utf8, size) + cdef inline object _to_bytes(self): + cdef line_sender_buffer_view view = line_sender_buffer_peek(self._impl) + if view.len: + return PyBytes_FromStringAndSize( view.buf, view.len) + else: + return b'' cdef inline void_int _set_marker(self) except -1: cdef line_sender_error* err = NULL @@ -2281,21 +2283,21 @@ cdef class Sender: self.establish() return self - def __str__(self) -> str: + def peek(self) -> bytes: """ Inspect the contents of the internal buffer. - The ``str`` value returned represents the unsent data. + The ``bytes`` value returned represents the unsent data. Also see :func:`Sender.__len__`. """ - return str(self._buffer) + return self._buffer.peek() def __len__(self) -> int: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(sender.peek)``. """ return len(self._buffer) diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index 50490ab9..d5759fa3 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -22,7 +22,7 @@ ## ################################################################################ -from libc.stdint cimport int64_t, uint16_t, uint64_t +from libc.stdint cimport int64_t, uint16_t, uint64_t, uint8_t cdef extern from "questdb/ingress/line_sender.h": cdef struct line_sender_error: @@ -102,6 +102,10 @@ cdef extern from "questdb/ingress/line_sender.h": size_t len const char* buf + cdef struct line_sender_buffer_view: + size_t len + const uint8_t* buf + bint line_sender_column_name_init( line_sender_column_name* name, size_t len, @@ -171,9 +175,8 @@ cdef extern from "questdb/ingress/line_sender.h": const line_sender_buffer* buffer ) noexcept nogil - const char* line_sender_buffer_peek( - const line_sender_buffer* buffer, - size_t* len_out + line_sender_buffer_view line_sender_buffer_peek( + const line_sender_buffer* buffer ) noexcept nogil bint line_sender_buffer_table( diff --git a/test/test.py b/test/test.py index a5ee8706..b537ee67 100755 --- a/test/test.py +++ b/test/test.py @@ -76,7 +76,7 @@ def test_basic(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) self.assertEqual(len(buf), 25) - self.assertEqual(str(buf), 'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(buf.peek(), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_table(self): buf = qi.Buffer() @@ -92,7 +92,7 @@ def test_bad_table(self): def test_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), 'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(buf.peek(), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_symbol_column_name(self): buf = qi.Buffer() @@ -121,38 +121,38 @@ def test_column(self): 'col7': two_h_after_epoch, 'col8': None}, at=qi.ServerTimestamp) exp = ( - 'tbl1 col1=t,col2=f,col3=-1i,col4=0.5,' - 'col5="val",col6=12345t,col7=7200000000t\n') - self.assertEqual(str(buf), exp) + b'tbl1 col1=t,col2=f,col3=-1i,col4=0.5,' + b'col5="val",col6=12345t,col7=7200000000t\n') + self.assertEqual(buf.peek(), exp) def test_none_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': None}, at=qi.ServerTimestamp) - exp = 'tbl1,sym1=val1\n' - self.assertEqual(str(buf), exp) + exp = b'tbl1,sym1=val1\n' + self.assertEqual(buf.peek(), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', symbols={'sym1': None, 'sym2': None}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), exp) + self.assertEqual(buf.peek(), exp) self.assertEqual(len(buf), len(exp)) def test_none_column(self): buf = qi.Buffer() buf.row('tbl1', columns={'col1': 1}, at=qi.ServerTimestamp) - exp = 'tbl1 col1=1i\n' - self.assertEqual(str(buf), exp) + exp = b'tbl1 col1=1i\n' + self.assertEqual(buf.peek(), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', columns={'col1': None, 'col2': None}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), exp) + self.assertEqual(buf.peek(), exp) self.assertEqual(len(buf), len(exp)) def test_no_symbol_or_col_args(self): buf = qi.Buffer() buf.row('table_name', at=qi.ServerTimestamp) - self.assertEqual(str(buf), '') + self.assertEqual(buf.peek(), b'') def test_unicode(self): buf = qi.Buffer() @@ -171,15 +171,15 @@ def test_unicode(self): 'questdb2': '嚜꓂', # UCS-2, 3 bytes for UTF-8. 'questdb3': '💩🦞'}, at=qi.ServerTimestamp) # UCS-4, 4 bytes for UTF-8. - self.assertEqual(str(buf), - f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' + + self.assertEqual(buf.peek(), + (f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' + 'tbl1,Questo\\ è\\ il\\ nome\\ di\\ una\\ colonna=' + 'Це\\ символьне\\ значення ' + - 'questdb1="",questdb2="嚜꓂",questdb3="💩🦞"\n') + 'questdb1="",questdb2="嚜꓂",questdb3="💩🦞"\n').encode('utf-8')) buf.clear() buf.row('tbl1', symbols={'questdb1': 'q❤️p'}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), 'tbl1,questdb1=q❤️p\n') + self.assertEqual(buf.peek(), 'tbl1,questdb1=q❤️p\n'.encode('utf-8')) # A bad char in Python. with self.assertRaisesRegex( @@ -191,30 +191,30 @@ def test_unicode(self): # Ensure we can continue using the buffer after an error. buf.row('tbl1', symbols={'questdb1': 'another line of input'}, at=qi.ServerTimestamp) self.assertEqual( - str(buf), - 'tbl1,questdb1=q❤️p\n' + + buf.peek(), + ('tbl1,questdb1=q❤️p\n' + # Note: No partially written failed line here. - 'tbl1,questdb1=another\\ line\\ of\\ input\n') + 'tbl1,questdb1=another\\ line\\ of\\ input\n').encode('utf-8')) def test_float(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 1.2345678901234567}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 num=1.2345678901234567\n') + self.assertEqual(buf.peek(), f'tbl1 num=1.2345678901234567\n'.encode('utf-8')) def test_int_range(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 0}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 num=0i\n') + self.assertEqual(buf.peek(), f'tbl1 num=0i\n'.encode('utf-8')) buf.clear() # 32-bit int range. buf.row('tbl1', columns={'min': -2 ** 31, 'max': 2 ** 31 - 1}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 min=-2147483648i,max=2147483647i\n') + self.assertEqual(buf.peek(), f'tbl1 min=-2147483648i,max=2147483647i\n'.encode('utf-8')) buf.clear() # 64-bit int range. buf.row('tbl1', columns={'min': -2 ** 63, 'max': 2 ** 63 - 1}, at=qi.ServerTimestamp) - self.assertEqual(str(buf), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n') + self.assertEqual(buf.peek(), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n'.encode('utf-8')) buf.clear() # Overflow. @@ -356,9 +356,9 @@ def test_flush_1(self): server.accept() with self.assertRaisesRegex(qi.IngressError, 'Column names'): sender.row('tbl1', symbols={'...bad name..': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(str(sender), '') + self.assertEqual(sender.peek(), b'') sender.flush() - self.assertEqual(str(sender), '') + self.assertEqual(sender.peek(), b'') msgs = server.recv() self.assertEqual(msgs, []) @@ -423,7 +423,7 @@ def test_two_rows_explicit_buffer(self): exp = ( 'line_sender_buffer_example2,id=Hola price="111222233333i",qty=3.5 111222233333\n' 'line_sender_example,id=Adios price="111222233343i",qty=2.5 111222233343\n') - self.assertEqual(str(buffer), exp) + self.assertEqual(buffer.peek(), exp.encode('utf-8')) sender.flush(buffer) msgs = server.recv() bexp = [msg.encode('utf-8') for msg in exp.rstrip().split('\n')] @@ -432,9 +432,8 @@ def test_two_rows_explicit_buffer(self): def test_independent_buffer(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) - exp = 'tbl1,sym1=val1\n' - bexp = exp[:-1].encode('utf-8') - self.assertEqual(str(buf), exp) + exp = b'tbl1,sym1=val1\n' + self.assertEqual(buf.peek(), exp) with Server() as server1, Server() as server2: with self.builder('tcp', 'localhost', server1.port) as sender1, \ @@ -443,21 +442,21 @@ def test_independent_buffer(self): server2.accept() sender1.flush(buf, clear=False) - self.assertEqual(str(buf), exp) + self.assertEqual(buf.peek(), exp) sender2.flush(buf, clear=False) - self.assertEqual(str(buf), exp) + self.assertEqual(buf.peek(), exp) msgs1 = server1.recv() msgs2 = server2.recv() - self.assertEqual(msgs1, [bexp]) - self.assertEqual(msgs2, [bexp]) + self.assertEqual(msgs1, [exp[:-1]]) + self.assertEqual(msgs2, [exp[:-1]]) sender1.flush(buf) - self.assertEqual(server1.recv(), [bexp]) + self.assertEqual(server1.recv(), [exp[:-1]]) # The buffer is now auto-cleared. - self.assertEqual(str(buf), '') + self.assertEqual(buf.peek(), b'') def test_auto_flush_settings_defaults(self): for protocol in ('tcp', 'tcps', 'http', 'https'): @@ -560,7 +559,7 @@ def test_dont_flush_on_exception(self): with self.builder('tcp', 'localhost', server.port) as sender: server.accept() sender.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(str(sender), 'tbl1,sym1=val1\n') + self.assertEqual(sender.peek(), b'tbl1,sym1=val1\n') raise RuntimeError('Test exception') msgs = server.recv() self.assertEqual(msgs, []) diff --git a/test/test_dataframe.py b/test/test_dataframe.py index cbd082e0..b4095046 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -35,7 +35,7 @@ def _dataframe(*args, **kwargs): buf = qi.Buffer() buf.dataframe(*args, **kwargs) - return str(buf) + return buf.peek() DF1 = pd.DataFrame({ 'A': [1.0, 2.0, 3.0], @@ -165,17 +165,17 @@ def test_bad_at(self): def test_empty_dataframe(self): buf = _dataframe(pd.DataFrame(), table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_zero_row_dataframe(self): buf = _dataframe(pd.DataFrame(columns=['A', 'B']), table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_zero_column_dataframe(self): df = pd.DataFrame(index=[0, 1, 2]) self.assertEqual(len(df), 3) buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual(buf, '') + self.assertEqual(buf, b'') def test_basic(self): buf = _dataframe( @@ -185,9 +185,9 @@ def test_basic(self): at=-1) self.assertEqual( buf, - 't1,A=a1,B=b1,C=b1,D=a1 E=1.0,F=1i 1520640000000000000\n' + - 't2,A=a2,D=a2 E=2.0,F=2i 1520726400000000000\n' + - 't1,A=a3,B=b3,C=b3,D=a3 E=3.0,F=3i 1520812800000000000\n') + b't1,A=a1,B=b1,C=b1,D=a1 E=1.0,F=1i 1520640000000000000\n' + + b't2,A=a2,D=a2 E=2.0,F=2i 1520726400000000000\n' + + b't1,A=a3,B=b3,C=b3,D=a3 E=3.0,F=3i 1520812800000000000\n') def test_named_dataframe(self): df = pd.DataFrame({ @@ -197,23 +197,23 @@ def test_named_dataframe(self): buf = _dataframe(df, at=qi.ServerTimestamp) self.assertEqual( buf, - 'table_name a=1i,b="a"\n' + - 'table_name a=2i,b="b"\n' + - 'table_name a=3i,b="c"\n') + b'table_name a=1i,b="a"\n' + + b'table_name a=2i,b="b"\n' + + b'table_name a=3i,b="c"\n') buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n') buf = _dataframe(df, table_name_col='b', at=qi.ServerTimestamp) self.assertEqual( buf, - 'a a=1i\n' + - 'b a=2i\n' + - 'c a=3i\n') + b'a a=1i\n' + + b'b a=2i\n' + + b'c a=3i\n') df.index.name = 42 # bad type, not str with self.assertRaisesRegex(qi.IngressError, @@ -244,9 +244,9 @@ def test_at_good(self): buf = _dataframe(df, table_name='tbl1', at=ts) self.assertEqual( buf, - 'tbl1 a=1i,b="a" 1520640000000000000\n' + - 'tbl1 a=2i,b="b" 1520640000000000000\n' + - 'tbl1 a=3i,b="c" 1520640000000000000\n') + b'tbl1 a=1i,b="a" 1520640000000000000\n' + + b'tbl1 a=2i,b="b" 1520640000000000000\n' + + b'tbl1 a=3i,b="c" 1520640000000000000\n') @unittest.skipIf(BROKEN_TIMEZONES, 'requires accurate timezones') def test_at_neg(self): @@ -281,9 +281,9 @@ def test_at_ts_0(self): buf = _dataframe(df, table_name='tbl1', at=ts) self.assertEqual( buf, - 'tbl1 a=1i,b="a" 0\n' + - 'tbl1 a=2i,b="b" 0\n' + - 'tbl1 a=3i,b="c" 0\n') + b'tbl1 a=1i,b="a" 0\n' + + b'tbl1 a=2i,b="b" 0\n' + + b'tbl1 a=3i,b="c" 0\n') def test_single_at_col(self): df = pd.DataFrame({'timestamp': pd.to_datetime(['2023-01-01'])}) @@ -306,11 +306,11 @@ def test_u8_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=255i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=255i\n') def test_i8_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -321,12 +321,12 @@ def test_i8_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-128i\n' + - 'tbl1 a=127i\n' + - 'tbl1 a=0i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-128i\n' + + b'tbl1 a=127i\n' + + b'tbl1 a=0i\n') def test_u16_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -337,11 +337,11 @@ def test_u16_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=65535i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=65535i\n') def test_i16_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -352,12 +352,12 @@ def test_i16_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-32768i\n' + - 'tbl1 a=32767i\n' + - 'tbl1 a=0i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-32768i\n' + + b'tbl1 a=32767i\n' + + b'tbl1 a=0i\n') def test_u32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -368,11 +368,11 @@ def test_u32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=4294967295i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=4294967295i\n') def test_i32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -384,12 +384,12 @@ def test_i32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-2147483648i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=2147483647i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-2147483648i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=2147483647i\n') def test_u64_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -400,20 +400,20 @@ def test_u64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=9223372036854775807i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=9223372036854775807i\n') buf = qi.Buffer() buf.dataframe(pd.DataFrame({'b': [.5, 1.0, 1.5]}), table_name='tbl2', at=qi.ServerTimestamp) exp1 = ( - 'tbl2 b=0.5\n' + - 'tbl2 b=1.0\n' + - 'tbl2 b=1.5\n') + b'tbl2 b=0.5\n' + + b'tbl2 b=1.0\n' + + b'tbl2 b=1.5\n') self.assertEqual( - str(buf), + buf.peek(), exp1) df2 = pd.DataFrame({'a': pd.Series([ 1, 2, 3, @@ -426,7 +426,7 @@ def test_u64_numpy_col(self): buf.dataframe(df2, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( - str(buf), + buf.peek(), exp1) # No partial write of `df2`. def test_i64_numpy_col(self): @@ -439,12 +439,12 @@ def test_i64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i\n' + - 'tbl1 a=2i\n' + - 'tbl1 a=3i\n' + - 'tbl1 a=-9223372036854775808i\n' + - 'tbl1 a=0i\n' + - 'tbl1 a=9223372036854775807i\n') + b'tbl1 a=1i\n' + + b'tbl1 a=2i\n' + + b'tbl1 a=3i\n' + + b'tbl1 a=-9223372036854775808i\n' + + b'tbl1 a=0i\n' + + b'tbl1 a=9223372036854775807i\n') def test_f32_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -458,14 +458,14 @@ def test_f32_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0\n' + - 'tbl1 a=2.0\n' + - 'tbl1 a=3.0\n' + - 'tbl1 a=0.0\n' + - 'tbl1 a=Infinity\n' + - 'tbl1 a=-Infinity\n' + - 'tbl1 a=NaN\n' + - 'tbl1 a=3.4028234663852886e38\n') + b'tbl1 a=1.0\n' + + b'tbl1 a=2.0\n' + + b'tbl1 a=3.0\n' + + b'tbl1 a=0.0\n' + + b'tbl1 a=Infinity\n' + + b'tbl1 a=-Infinity\n' + + b'tbl1 a=NaN\n' + + b'tbl1 a=3.4028234663852886e38\n') def test_f64_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -479,14 +479,14 @@ def test_f64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0\n' + - 'tbl1 a=2.0\n' + - 'tbl1 a=3.0\n' + - 'tbl1 a=0.0\n' + - 'tbl1 a=Infinity\n' + - 'tbl1 a=-Infinity\n' + - 'tbl1 a=NaN\n' + - 'tbl1 a=1.7976931348623157e308\n') + b'tbl1 a=1.0\n' + + b'tbl1 a=2.0\n' + + b'tbl1 a=3.0\n' + + b'tbl1 a=0.0\n' + + b'tbl1 a=Infinity\n' + + b'tbl1 a=-Infinity\n' + + b'tbl1 a=NaN\n' + + b'tbl1 a=1.7976931348623157e308\n') def test_u8_arrow_col(self): df = pd.DataFrame({ @@ -500,12 +500,12 @@ def test_u8_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=255i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=255i,b="f"\n') def test_i8_arrow_col(self): df = pd.DataFrame({ @@ -520,13 +520,13 @@ def test_i8_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-128i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=127i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-128i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=127i,b="g"\n') def test_u16_arrow_col(self): df = pd.DataFrame({ @@ -540,12 +540,12 @@ def test_u16_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + + ('tbl1 a=1i,b="a"\n' + 'tbl1 a=2i,b="b"\n' + 'tbl1 a=3i,b="c"\n' + 'tbl1 a=0i,b="d"\n' + 'tbl1 b="e"\n' + - 'tbl1 a=65535i,b="f"\n') + 'tbl1 a=65535i,b="f"\n').encode('utf-8')) def test_i16_arrow_col(self): df = pd.DataFrame({ @@ -560,13 +560,13 @@ def test_i16_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-32768i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=32767i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-32768i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=32767i,b="g"\n') def test_u32_arrow_col(self): df = pd.DataFrame({ @@ -580,12 +580,12 @@ def test_u32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=4294967295i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=4294967295i,b="f"\n') def test_i32_arrow_col(self): df = pd.DataFrame({ @@ -600,13 +600,13 @@ def test_i32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-2147483648i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=2147483647i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-2147483648i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=2147483647i,b="g"\n') def test_u64_arrow_col(self): df = pd.DataFrame({ @@ -620,12 +620,12 @@ def test_u64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=0i,b="d"\n' + - 'tbl1 b="e"\n' + - 'tbl1 a=9223372036854775807i,b="f"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=9223372036854775807i,b="f"\n') df2 = pd.DataFrame({'a': pd.Series([ 1, 2, 3, @@ -650,13 +650,13 @@ def test_i64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1i,b="a"\n' + - 'tbl1 a=2i,b="b"\n' + - 'tbl1 a=3i,b="c"\n' + - 'tbl1 a=-9223372036854775808i,b="d"\n' + - 'tbl1 a=0i,b="e"\n' + - 'tbl1 b="f"\n' + - 'tbl1 a=9223372036854775807i,b="g"\n') + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-9223372036854775808i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=9223372036854775807i,b="g"\n') def test_f32_arrow_col(self): df = pd.DataFrame({ @@ -673,15 +673,15 @@ def test_f32_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0,b="a"\n' + - 'tbl1 a=2.0,b="b"\n' + - 'tbl1 a=3.0,b="c"\n' + - 'tbl1 a=0.0,b="d"\n' + - 'tbl1 a=Infinity,b="e"\n' + - 'tbl1 a=-Infinity,b="f"\n' + - 'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - 'tbl1 a=3.4028234663852886e38,b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1.0,b="a"\n' + + b'tbl1 a=2.0,b="b"\n' + + b'tbl1 a=3.0,b="c"\n' + + b'tbl1 a=0.0,b="d"\n' + + b'tbl1 a=Infinity,b="e"\n' + + b'tbl1 a=-Infinity,b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a=3.4028234663852886e38,b="h"\n' + + b'tbl1 b="i"\n') def test_f64_arrow_col(self): df = pd.DataFrame({ @@ -698,15 +698,15 @@ def test_f64_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1.0,b="a"\n' + - 'tbl1 a=2.0,b="b"\n' + - 'tbl1 a=3.0,b="c"\n' + - 'tbl1 a=0.0,b="d"\n' + - 'tbl1 a=Infinity,b="e"\n' + - 'tbl1 a=-Infinity,b="f"\n' + - 'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - 'tbl1 a=1.7976931348623157e308,b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1.0,b="a"\n' + + b'tbl1 a=2.0,b="b"\n' + + b'tbl1 a=3.0,b="c"\n' + + b'tbl1 a=0.0,b="d"\n' + + b'tbl1 a=Infinity,b="e"\n' + + b'tbl1 a=-Infinity,b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a=1.7976931348623157e308,b="h"\n' + + b'tbl1 b="i"\n') def test_bool_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -716,12 +716,12 @@ def test_bool_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') def test_bool_arrow_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -733,18 +733,18 @@ def test_bool_arrow_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=t\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n') df2 = pd.DataFrame({'a': pd.Series([ True, False, False, @@ -763,12 +763,12 @@ def test_bool_obj_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=t\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=f\n' + - 'tbl1 a=t\n' + - 'tbl1 a=f\n') + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') df2 = pd.DataFrame({'a': pd.Series([ True, False, 'false'], @@ -803,15 +803,15 @@ def test_datetime64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=1546300800000000t,b="a"\n' + - 'tbl1 a=1546300801000000t,b="b"\n' + - 'tbl1 a=1546300802000000t,b="c"\n' + - 'tbl1 a=1546300803000000t,b="d"\n' + - 'tbl1 a=1546300804000000t,b="e"\n' + - 'tbl1 a=1546300805000000t,b="f"\n' + - 'tbl1 b="g"\n' + - 'tbl1 b="h"\n' + - 'tbl1 b="i"\n') + b'tbl1 a=1546300800000000t,b="a"\n' + + b'tbl1 a=1546300801000000t,b="b"\n' + + b'tbl1 a=1546300802000000t,b="c"\n' + + b'tbl1 a=1546300803000000t,b="d"\n' + + b'tbl1 a=1546300804000000t,b="e"\n' + + b'tbl1 a=1546300805000000t,b="f"\n' + + b'tbl1 b="g"\n' + + b'tbl1 b="h"\n' + + b'tbl1 b="i"\n') df = pd.DataFrame({'a': pd.Series([ pd.Timestamp('1970-01-01 00:00:00'), @@ -820,9 +820,9 @@ def test_datetime64_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a=0t\n' + - 'tbl1 a=1000000t\n' + - 'tbl1 a=2000000t\n') + b'tbl1 a=0t\n' + + b'tbl1 a=1000000t\n' + + b'tbl1 a=2000000t\n') def test_datetime64_tz_arrow_col(self): df = pd.DataFrame({ @@ -842,10 +842,10 @@ def test_datetime64_tz_arrow_col(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 a=1546318800000000t\n' + - 'tbl1,b=sym2 a=1546318801000000t\n' + - 'tbl1,b=sym3\n' + - 'tbl1,b=sym4 a=1546318803000000t\n') + b'tbl1,b=sym1 a=1546318800000000t\n' + + b'tbl1,b=sym2 a=1546318801000000t\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 a=1546318803000000t\n') # Not epoch 0. df = pd.DataFrame({ @@ -864,9 +864,9 @@ def test_datetime64_tz_arrow_col(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 a=18000000000t\n' + - 'tbl1,b=sym2 a=18001000000t\n' + - 'tbl1,b=sym3 a=18002000000t\n') + b'tbl1,b=sym1 a=18000000000t\n' + + b'tbl1,b=sym2 a=18001000000t\n' + + b'tbl1,b=sym3 a=18002000000t\n') # Actual epoch 0. df = pd.DataFrame({ @@ -884,9 +884,9 @@ def test_datetime64_tz_arrow_col(self): buf = _dataframe(df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,b=sym1 a=0t\n' + - 'tbl1,b=sym2 a=1000000t\n' + - 'tbl1,b=sym3 a=2000000t\n') + b'tbl1,b=sym1 a=0t\n' + + b'tbl1,b=sym2 a=1000000t\n' + + b'tbl1,b=sym3 a=2000000t\n') df2 = pd.DataFrame({ 'a': [ @@ -900,8 +900,8 @@ def test_datetime64_tz_arrow_col(self): # Mostly, here assert that negative timestamps are allowed. self.assertIn( buf, - ['tbl1,b=sym1 a=-2208970800000000t\n', - 'tbl1,b=sym1 a=-2208971040000000t\n']) + [b'tbl1,b=sym1 a=-2208970800000000t\n', + b'tbl1,b=sym1 a=-2208971040000000t\n']) def test_datetime64_numpy_at(self): df = pd.DataFrame({ @@ -920,15 +920,15 @@ def test_datetime64_numpy_at(self): buf = _dataframe(df, table_name='tbl1', at='a') self.assertEqual( buf, - 'tbl1 b=1i 1546300800000000000\n' + - 'tbl1 b=2i 1546300801000000000\n' + - 'tbl1 b=3i 1546300802000000000\n' + - 'tbl1 b=4i 1546300803000000000\n' + - 'tbl1 b=5i 1546300804000000000\n' + - 'tbl1 b=6i 1546300805000000000\n' + - 'tbl1 b=7i\n' + - 'tbl1 b=8i\n' + - 'tbl1 b=9i\n') + b'tbl1 b=1i 1546300800000000000\n' + + b'tbl1 b=2i 1546300801000000000\n' + + b'tbl1 b=3i 1546300802000000000\n' + + b'tbl1 b=4i 1546300803000000000\n' + + b'tbl1 b=5i 1546300804000000000\n' + + b'tbl1 b=6i 1546300805000000000\n' + + b'tbl1 b=7i\n' + + b'tbl1 b=8i\n' + + b'tbl1 b=9i\n') df = pd.DataFrame({ 'a': pd.Series([ @@ -940,9 +940,9 @@ def test_datetime64_numpy_at(self): buf = _dataframe(df, table_name='tbl1', at='a') self.assertEqual( buf, - 'tbl1 b=1i 0\n' + - 'tbl1 b=2i 1000000000\n' + - 'tbl1 b=3i 2000000000\n') + b'tbl1 b=1i 0\n' + + b'tbl1 b=2i 1000000000\n' + + b'tbl1 b=3i 2000000000\n') def test_datetime64_tz_arrow_at(self): df = pd.DataFrame({ @@ -962,10 +962,10 @@ def test_datetime64_tz_arrow_at(self): self.assertEqual( buf, # Note how these are 5hr offset from `test_datetime64_numpy_col`. - 'tbl1,b=sym1 1546318800000000000\n' + - 'tbl1,b=sym2 1546318801000000000\n' + - 'tbl1,b=sym3\n' + - 'tbl1,b=sym4 1546318803000000000\n') + b'tbl1,b=sym1 1546318800000000000\n' + + b'tbl1,b=sym2 1546318801000000000\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 1546318803000000000\n') df2 = pd.DataFrame({ 'a': [ @@ -991,11 +991,11 @@ def _test_pyobjstr_table(self, dtype): buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) self.assertEqual( buf, - 'a b=1i\n' + + ('a b=1i\n' + ('b' * 127) + ' b=2i\n' + 'q❤️p b=3i\n' + '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n') + '💩🦞 b=5i\n').encode("utf-8")) with self.assertRaisesRegex( qi.IngressError, "Too long"): @@ -1063,8 +1063,8 @@ def test_obj_string_table(self): '.': pd.Series(['x', 42], dtype='string'), 'z': [1, 2]}), table_name_col='.', at=qi.ServerTimestamp), - 'x z=1i\n' + - '42 z=2i\n') + b'x z=1i\n' + + b'42 z=2i\n') def _test_pyobjstr_numpy_symbol(self, dtype): df = pd.DataFrame({'a': pd.Series([ @@ -1080,14 +1080,14 @@ def _test_pyobjstr_numpy_symbol(self, dtype): buf = _dataframe(df, table_name='tbl1', symbols=True, at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,a=a\n' + + ('tbl1,a=a\n' + 'tbl1,a=q❤️p\n' + 'tbl1,a=' + ('❤️' * 1200) + '\n' + 'tbl1,a=Questo\\ è\\ un\\ qualcosa\n' + 'tbl1,a=щось\n' + 'tbl1,a=\n' + 'tbl1,a=嚜꓂\n' + - 'tbl1,a=💩🦞\n') + 'tbl1,a=💩🦞\n').encode("utf-8")) for null_obj in (None, float('nan'), pd.NA): self.assertEqual( @@ -1096,8 +1096,8 @@ def _test_pyobjstr_numpy_symbol(self, dtype): 'x': pd.Series(['a', null_obj], dtype=dtype), 'y': [1, 2]}), table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - 'tbl1,x=a y=1i\n' + - 'tbl1 y=2i\n') + b'tbl1,x=a y=1i\n' + + b'tbl1 y=2i\n') def test_obj_str_numpy_symbol(self): self._test_pyobjstr_numpy_symbol('object') @@ -1119,8 +1119,8 @@ def test_obj_string_numpy_symbol(self): 'x': pd.Series(['x', 42], dtype='string'), 'y': [1, 2]}), table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - 'tbl1,x=x y=1i\n' + - 'tbl1,x=42 y=2i\n') + b'tbl1,x=x y=1i\n' + + b'tbl1,x=42 y=2i\n') def test_str_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -1136,14 +1136,14 @@ def test_str_numpy_col(self): buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a="a"\n' + + ('tbl1 a="a"\n' + 'tbl1 a="q❤️p"\n' + 'tbl1 a="' + ('❤️' * 1200) + '"\n' + 'tbl1 a="Questo è un qualcosa"\n' + 'tbl1 a="щось"\n' + 'tbl1 a=""\n' + 'tbl1 a="嚜꓂"\n' + - 'tbl1 a="💩🦞"\n') + 'tbl1 a="💩🦞"\n').encode("utf-8")) def test_str_arrow_table(self): df = pd.DataFrame({ @@ -1158,11 +1158,11 @@ def test_str_arrow_table(self): buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) self.assertEqual( buf, - 'a b=1i\n' + + ('a b=1i\n' + ('b' * 127) + ' b=2i\n' + 'q❤️p b=3i\n' + '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n') + '💩🦞 b=5i\n').encode("utf-8")) with self.assertRaisesRegex( qi.IngressError, "Too long"): @@ -1210,7 +1210,7 @@ def test_str_arrow_symbol(self): buf = _dataframe(df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1,a=a b=1i\n' + + ('tbl1,a=a b=1i\n' + 'tbl1,a=q❤️p b=2i\n' + 'tbl1,a=' + ('❤️' * 1200) + ' b=3i\n' + 'tbl1,a=Questo\\ è\\ un\\ qualcosa b=4i\n' + @@ -1218,7 +1218,7 @@ def test_str_arrow_symbol(self): 'tbl1,a= b=6i\n' + 'tbl1 b=7i\n' + 'tbl1,a=嚜꓂ b=8i\n' + - 'tbl1,a=💩🦞 b=9i\n') + 'tbl1,a=💩🦞 b=9i\n').encode('utf-8')) def test_str_arrow_col(self): df = pd.DataFrame({ @@ -1237,7 +1237,7 @@ def test_str_arrow_col(self): buf = _dataframe(df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 a="a",b=1i\n' + + ('tbl1 a="a",b=1i\n' + 'tbl1 a="q❤️p",b=2i\n' + 'tbl1 a="' + ('❤️' * 1200) + '",b=3i\n' + 'tbl1 a="Questo è un qualcosa",b=4i\n' + @@ -1245,7 +1245,7 @@ def test_str_arrow_col(self): 'tbl1 a="",b=6i\n' + 'tbl1 b=7i\n' + 'tbl1 a="嚜꓂",b=8i\n' + - 'tbl1 a="💩🦞",b=9i\n') + 'tbl1 a="💩🦞",b=9i\n').encode('utf-8')) def test_pyobj_int_col(self): int64_min = -2**63 @@ -1260,7 +1260,7 @@ def test_pyobj_int_col(self): int64_max], dtype='object'), 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), table_name='tbl1', at = qi.ServerTimestamp), - 'tbl1 a=1i,b=1i\n' + + ('tbl1 a=1i,b=1i\n' + 'tbl1 a=2i,b=2i\n' + 'tbl1 a=3i,b=3i\n' + 'tbl1 b=4i\n' + @@ -1269,7 +1269,7 @@ def test_pyobj_int_col(self): 'tbl1 a=7i,b=7i\n' + 'tbl1 a=0i,b=8i\n' + 'tbl1 a=' + str(int64_min) + 'i,b=9i\n' + - 'tbl1 a=' + str(int64_max) + 'i,b=10i\n') + 'tbl1 a=' + str(int64_max) + 'i,b=10i\n').encode('utf-8')) with self.assertRaisesRegex( qi.IngressError, "1 \\('STRING'\\): .*type int, got.*str\\."): @@ -1298,13 +1298,13 @@ def test_pyobj_float_col(self): dtype='object'), 'b': [1, 2, 3, 4, 5, 6, 7]}), table_name='tbl1', at = qi.ServerTimestamp), - 'tbl1 a=1.0,b=1i\n' + - 'tbl1 a=2.0,b=2i\n' + - 'tbl1 a=3.0,b=3i\n' + - 'tbl1 b=4i\n' + - 'tbl1 a=NaN,b=5i\n' + - 'tbl1 b=6i\n' + - 'tbl1 a=7.0,b=7i\n') + b'tbl1 a=1.0,b=1i\n' + + b'tbl1 a=2.0,b=2i\n' + + b'tbl1 a=3.0,b=3i\n' + + b'tbl1 b=4i\n' + + b'tbl1 a=NaN,b=5i\n' + + b'tbl1 b=6i\n' + + b'tbl1 a=7.0,b=7i\n') with self.assertRaisesRegex( qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."): @@ -1335,7 +1335,7 @@ def _test_cat_table(self, count): exp = ''.join( f'{s} b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1369,7 +1369,7 @@ def _test_cat_symbol(self, count): exp = ''.join( f'tbl1,a={s} b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1378,7 +1378,7 @@ def _test_cat_symbol(self, count): exp2 = exp.replace('tbl1,a=s2 b=2i\n', 'tbl1 b=2i\n') buf2 = _dataframe(df2, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2) + self.assertEqual(buf2, exp2.encode("utf-8")) def test_cat_i8_symbol(self): self._test_cat_symbol(30) @@ -1404,7 +1404,7 @@ def _test_cat_str(self, count): exp = ''.join( f'tbl1 a="{s}",b={i}i\n' for i, s in enumerate(slist)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) slist[2] = None df2 = pd.DataFrame({ @@ -1413,7 +1413,7 @@ def _test_cat_str(self, count): exp2 = exp.replace('tbl1 a="s2",b=2i\n', 'tbl1 b=2i\n') buf2 = _dataframe(df2, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2) + self.assertEqual(buf2, exp2.encode("utf-8")) def test_cat_i8_str(self): self._test_cat_str(30) @@ -1435,9 +1435,9 @@ def test_all_nulls_pyobj_col(self): buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) self.assertEqual( buf, - 'tbl1 b=1i\n' + - 'tbl1 b=2i\n' + - 'tbl1 b=3i\n') + b'tbl1 b=1i\n' + + b'tbl1 b=2i\n' + + b'tbl1 b=3i\n') def test_strided_numpy_column(self): two_d = np.array([ @@ -1472,7 +1472,7 @@ def test_serializing_in_chunks(self): exp = ''.join( f'tbl1 a={i}i,b={i}i\n' for i in range(index * 10, (index + 1) * 10)) - self.assertEqual(buf, exp) + self.assertEqual(buf, exp.encode("utf-8")) def test_arrow_chunked_array(self): # We build a table with chunked arrow arrays as columns. @@ -1495,15 +1495,15 @@ def test_arrow_chunked_array(self): df = arr_tab.to_pandas() buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) exp = ( - 'tbl1 a=1i,b=10i\n' + - 'tbl1 a=2i,b=20i\n' + - 'tbl1 a=3i,b=30i\n' + - 'tbl1 a=4i,b=40i\n' + - 'tbl1 a=5i,b=50i\n' + - 'tbl1 a=6i,b=60i\n' + - 'tbl1 a=7i,b=70i\n' + - 'tbl1 a=8i,b=80i\n' + - 'tbl1 a=9i,b=90i\n') + b'tbl1 a=1i,b=10i\n' + + b'tbl1 a=2i,b=20i\n' + + b'tbl1 a=3i,b=30i\n' + + b'tbl1 a=4i,b=40i\n' + + b'tbl1 a=5i,b=50i\n' + + b'tbl1 a=6i,b=60i\n' + + b'tbl1 a=7i,b=70i\n' + + b'tbl1 a=8i,b=80i\n' + + b'tbl1 a=9i,b=90i\n') self.assertEqual(buf, exp) if not hasattr(pd, 'ArrowDtype'): @@ -1568,18 +1568,18 @@ def df_eq(exp_df, deser_df, exp_dtypes): df_eq(df, fp2fp_df, exp_dtypes) exp = ( - 'tbl1,s=a a=1i,b=10i,c=0.5\n' + - 'tbl1,s=b a=2i,b=20i,c=NaN\n' + - 'tbl1,s=a a=3i,b=30i,c=2.5\n' + - 'tbl1,s=c a=4i,c=3.5\n' + - 'tbl1,s=a a=5i,b=50i,c=NaN\n') + b'tbl1,s=a a=1i,b=10i,c=0.5\n' + + b'tbl1,s=b a=2i,b=20i,c=NaN\n' + + b'tbl1,s=a a=3i,b=30i,c=2.5\n' + + b'tbl1,s=c a=4i,c=3.5\n' + + b'tbl1,s=a a=5i,b=50i,c=NaN\n') fallback_exp = ( - 'tbl1 s="a",a=1i,b=10.0,c=0.5\n' + - 'tbl1 s="b",a=2i,b=20.0,c=NaN\n' + - 'tbl1 s="a",a=3i,b=30.0,c=2.5\n' + - 'tbl1 s="c",a=4i,b=NaN,c=3.5\n' + - 'tbl1 s="a",a=5i,b=50.0,c=NaN\n') + b'tbl1 s="a",a=1i,b=10.0,c=0.5\n' + + b'tbl1 s="b",a=2i,b=20.0,c=NaN\n' + + b'tbl1 s="a",a=3i,b=30.0,c=2.5\n' + + b'tbl1 s="c",a=4i,b=NaN,c=3.5\n' + + b'tbl1 s="a",a=5i,b=50.0,c=NaN\n') self.assertEqual(_dataframe(df, table_name='tbl1', at=qi.ServerTimestamp), exp) self.assertEqual(_dataframe(pa2pa_df, table_name='tbl1', at=qi.ServerTimestamp), exp) From 250135d3d5e89b9cb0dc079c79d318b1c1faa0e6 Mon Sep 17 00:00:00 2001 From: victor Date: Tue, 25 Mar 2025 17:16:33 +0800 Subject: [PATCH 02/15] comment optimize. --- src/questdb/ingress.pyi | 4 ++-- src/questdb/ingress.pyx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index 9bd4a454..c6fd3e97 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -345,7 +345,7 @@ class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(buffer.peek())``. """ def peek(self) -> bytes: @@ -945,7 +945,7 @@ class Sender: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(sender.peek())``. """ def peek(self) -> bytes: diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 88a67279..43129717 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -825,7 +825,7 @@ cdef class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(str(sender))``. + Equivalent (but cheaper) to ``len(buffer.peek())``. """ return line_sender_buffer_size(self._impl) @@ -2297,7 +2297,7 @@ cdef class Sender: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(sender.peek)``. + Equivalent (but cheaper) to ``len(sender.peek())``. """ return len(self._buffer) From 874b90b2a06687ce40c02f2039e84eab39d40a16 Mon Sep 17 00:00:00 2001 From: victor Date: Tue, 25 Mar 2025 19:19:41 +0800 Subject: [PATCH 03/15] rename peek() to __bytes__ --- ci/cibuildwheel.yaml | 2 +- src/questdb/extra_cpython.pxd | 4 --- src/questdb/ingress.pyi | 8 +++--- src/questdb/ingress.pyx | 18 ++++++-------- test/test.py | 46 +++++++++++++++++------------------ test/test_dataframe.py | 6 ++--- 6 files changed, 39 insertions(+), 45 deletions(-) diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml index 6e6aef5f..6e5b156c 100644 --- a/ci/cibuildwheel.yaml +++ b/ci/cibuildwheel.yaml @@ -144,7 +144,7 @@ stages: inputs: {pathtoPublish: 'wheelhouse'} - job: macos_x64 - pool: {vmImage: 'macOS-12'} + pool: {vmImage: 'macOS-13'} timeoutInMinutes: 90 steps: - task: UsePythonVersion@0 diff --git a/src/questdb/extra_cpython.pxd b/src/questdb/extra_cpython.pxd index e848814d..3e794566 100644 --- a/src/questdb/extra_cpython.pxd +++ b/src/questdb/extra_cpython.pxd @@ -27,10 +27,6 @@ cdef extern from "Python.h": str PyUnicode_FromStringAndSize( const char* u, Py_ssize_t size) - # Ditto, see comment on why not returning a `PyObject` above. - object PyBytes_FromStringAndSize( - const char* u, Py_ssize_t size) - # Must be called before accessing data or is compact check. int PyUnicode_READY(PyObject* o) except -1 diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index c6fd3e97..b75b6f55 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -345,10 +345,10 @@ class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(buffer.peek())``. + Equivalent (but cheaper) to ``len(bytes(buffer))``. """ - def peek(self) -> bytes: + def __bytes__(self) -> bytes: """Return the constructed buffer as bytes. Use for debugging.""" def row( @@ -945,10 +945,10 @@ class Sender: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(sender.peek())``. + Equivalent (but cheaper) to ``len(bytes(sender))``. """ - def peek(self) -> bytes: + def __bytes__(self) -> bytes: """ Inspect the contents of the internal buffer. diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 43129717..3621368c 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -78,6 +78,7 @@ from enum import Enum from typing import List, Tuple, Dict, Union, Any, Optional, Callable, \ Iterable import pathlib +from cpython.bytes cimport PyBytes_FromStringAndSize import sys import os @@ -825,20 +826,17 @@ cdef class Buffer: """ The current number of bytes currently in the buffer. - Equivalent (but cheaper) to ``len(buffer.peek())``. + Equivalent (but cheaper) to ``len(bytes(buffer))``. """ return line_sender_buffer_size(self._impl) - def peek(self) -> bytes: - """Return the constructed buffer as a string. Use for debugging.""" + def __bytes__(self) -> bytes: + """Return the constructed buffer as bytes. Use for debugging.""" return self._to_bytes() cdef inline object _to_bytes(self): cdef line_sender_buffer_view view = line_sender_buffer_peek(self._impl) - if view.len: - return PyBytes_FromStringAndSize( view.buf, view.len) - else: - return b'' + return PyBytes_FromStringAndSize( view.buf, view.len) cdef inline void_int _set_marker(self) except -1: cdef line_sender_error* err = NULL @@ -2283,7 +2281,7 @@ cdef class Sender: self.establish() return self - def peek(self) -> bytes: + def __bytes__(self) -> bytes: """ Inspect the contents of the internal buffer. @@ -2291,13 +2289,13 @@ cdef class Sender: Also see :func:`Sender.__len__`. """ - return self._buffer.peek() + return bytes(self._buffer) def __len__(self) -> int: """ Number of bytes of unsent data in the internal buffer. - Equivalent (but cheaper) to ``len(sender.peek())``. + Equivalent (but cheaper) to ``len(bytes(sender))``. """ return len(self._buffer) diff --git a/test/test.py b/test/test.py index b537ee67..03a2212b 100755 --- a/test/test.py +++ b/test/test.py @@ -76,7 +76,7 @@ def test_basic(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) self.assertEqual(len(buf), 25) - self.assertEqual(buf.peek(), b'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(bytes(buf), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_table(self): buf = qi.Buffer() @@ -92,7 +92,7 @@ def test_bad_table(self): def test_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': 'val2'}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), b'tbl1,sym1=val1,sym2=val2\n') + self.assertEqual(bytes(buf), b'tbl1,sym1=val1,sym2=val2\n') def test_bad_symbol_column_name(self): buf = qi.Buffer() @@ -123,36 +123,36 @@ def test_column(self): exp = ( b'tbl1 col1=t,col2=f,col3=-1i,col4=0.5,' b'col5="val",col6=12345t,col7=7200000000t\n') - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) def test_none_symbol(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1', 'sym2': None}, at=qi.ServerTimestamp) exp = b'tbl1,sym1=val1\n' - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', symbols={'sym1': None, 'sym2': None}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) def test_none_column(self): buf = qi.Buffer() buf.row('tbl1', columns={'col1': 1}, at=qi.ServerTimestamp) exp = b'tbl1 col1=1i\n' - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) # No fields to write, no fields written, therefore a no-op. buf.row('tbl1', columns={'col1': None, 'col2': None}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) self.assertEqual(len(buf), len(exp)) def test_no_symbol_or_col_args(self): buf = qi.Buffer() buf.row('table_name', at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), b'') + self.assertEqual(bytes(buf), b'') def test_unicode(self): buf = qi.Buffer() @@ -171,7 +171,7 @@ def test_unicode(self): 'questdb2': '嚜꓂', # UCS-2, 3 bytes for UTF-8. 'questdb3': '💩🦞'}, at=qi.ServerTimestamp) # UCS-4, 4 bytes for UTF-8. - self.assertEqual(buf.peek(), + self.assertEqual(bytes(buf), (f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' + 'tbl1,Questo\\ è\\ il\\ nome\\ di\\ una\\ colonna=' + 'Це\\ символьне\\ значення ' + @@ -179,7 +179,7 @@ def test_unicode(self): buf.clear() buf.row('tbl1', symbols={'questdb1': 'q❤️p'}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), 'tbl1,questdb1=q❤️p\n'.encode('utf-8')) + self.assertEqual(bytes(buf), 'tbl1,questdb1=q❤️p\n'.encode('utf-8')) # A bad char in Python. with self.assertRaisesRegex( @@ -191,7 +191,7 @@ def test_unicode(self): # Ensure we can continue using the buffer after an error. buf.row('tbl1', symbols={'questdb1': 'another line of input'}, at=qi.ServerTimestamp) self.assertEqual( - buf.peek(), + bytes(buf), ('tbl1,questdb1=q❤️p\n' + # Note: No partially written failed line here. 'tbl1,questdb1=another\\ line\\ of\\ input\n').encode('utf-8')) @@ -199,22 +199,22 @@ def test_unicode(self): def test_float(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 1.2345678901234567}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), f'tbl1 num=1.2345678901234567\n'.encode('utf-8')) + self.assertEqual(bytes(buf), f'tbl1 num=1.2345678901234567\n'.encode('utf-8')) def test_int_range(self): buf = qi.Buffer() buf.row('tbl1', columns={'num': 0}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), f'tbl1 num=0i\n'.encode('utf-8')) + self.assertEqual(bytes(buf), f'tbl1 num=0i\n'.encode('utf-8')) buf.clear() # 32-bit int range. buf.row('tbl1', columns={'min': -2 ** 31, 'max': 2 ** 31 - 1}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), f'tbl1 min=-2147483648i,max=2147483647i\n'.encode('utf-8')) + self.assertEqual(bytes(buf), f'tbl1 min=-2147483648i,max=2147483647i\n'.encode('utf-8')) buf.clear() # 64-bit int range. buf.row('tbl1', columns={'min': -2 ** 63, 'max': 2 ** 63 - 1}, at=qi.ServerTimestamp) - self.assertEqual(buf.peek(), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n'.encode('utf-8')) + self.assertEqual(bytes(buf), f'tbl1 min=-9223372036854775808i,max=9223372036854775807i\n'.encode('utf-8')) buf.clear() # Overflow. @@ -356,9 +356,9 @@ def test_flush_1(self): server.accept() with self.assertRaisesRegex(qi.IngressError, 'Column names'): sender.row('tbl1', symbols={'...bad name..': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(sender.peek(), b'') + self.assertEqual(bytes(sender), b'') sender.flush() - self.assertEqual(sender.peek(), b'') + self.assertEqual(bytes(sender), b'') msgs = server.recv() self.assertEqual(msgs, []) @@ -423,7 +423,7 @@ def test_two_rows_explicit_buffer(self): exp = ( 'line_sender_buffer_example2,id=Hola price="111222233333i",qty=3.5 111222233333\n' 'line_sender_example,id=Adios price="111222233343i",qty=2.5 111222233343\n') - self.assertEqual(buffer.peek(), exp.encode('utf-8')) + self.assertEqual(bytes(buffer), exp.encode('utf-8')) sender.flush(buffer) msgs = server.recv() bexp = [msg.encode('utf-8') for msg in exp.rstrip().split('\n')] @@ -433,7 +433,7 @@ def test_independent_buffer(self): buf = qi.Buffer() buf.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) exp = b'tbl1,sym1=val1\n' - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) with Server() as server1, Server() as server2: with self.builder('tcp', 'localhost', server1.port) as sender1, \ @@ -442,10 +442,10 @@ def test_independent_buffer(self): server2.accept() sender1.flush(buf, clear=False) - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) sender2.flush(buf, clear=False) - self.assertEqual(buf.peek(), exp) + self.assertEqual(bytes(buf), exp) msgs1 = server1.recv() msgs2 = server2.recv() @@ -456,7 +456,7 @@ def test_independent_buffer(self): self.assertEqual(server1.recv(), [exp[:-1]]) # The buffer is now auto-cleared. - self.assertEqual(buf.peek(), b'') + self.assertEqual(bytes(buf), b'') def test_auto_flush_settings_defaults(self): for protocol in ('tcp', 'tcps', 'http', 'https'): @@ -559,7 +559,7 @@ def test_dont_flush_on_exception(self): with self.builder('tcp', 'localhost', server.port) as sender: server.accept() sender.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) - self.assertEqual(sender.peek(), b'tbl1,sym1=val1\n') + self.assertEqual(bytes(sender), b'tbl1,sym1=val1\n') raise RuntimeError('Test exception') msgs = server.recv() self.assertEqual(msgs, []) diff --git a/test/test_dataframe.py b/test/test_dataframe.py index b4095046..baa5b979 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -35,7 +35,7 @@ def _dataframe(*args, **kwargs): buf = qi.Buffer() buf.dataframe(*args, **kwargs) - return buf.peek() + return bytes(buf) DF1 = pd.DataFrame({ 'A': [1.0, 2.0, 3.0], @@ -413,7 +413,7 @@ def test_u64_numpy_col(self): b'tbl2 b=1.0\n' + b'tbl2 b=1.5\n') self.assertEqual( - buf.peek(), + bytes(buf), exp1) df2 = pd.DataFrame({'a': pd.Series([ 1, 2, 3, @@ -426,7 +426,7 @@ def test_u64_numpy_col(self): buf.dataframe(df2, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( - buf.peek(), + bytes(buf), exp1) # No partial write of `df2`. def test_i64_numpy_col(self): From 9118a09d5a3ead369dd9cca1a8051a5755a3e036 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 6 Apr 2025 23:35:05 +0800 Subject: [PATCH 04/15] resolve conflict. --- c-questdb-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index 84b09b05..14fc19f3 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 84b09b0572fc2372501cc911ce46ae2073398911 +Subproject commit 14fc19f382ada9d58ce18884e7f42b10bbb36784 From 75f620da140e3a5a994cb6b0f2798e8a46bd020c Mon Sep 17 00:00:00 2001 From: victor Date: Thu, 8 May 2025 21:31:38 +0800 Subject: [PATCH 05/15] python client begin. --- c-questdb-client | 2 +- src/questdb/extra_cpython.pxd | 2 + src/questdb/ingress.pyi | 26 ++++++-- src/questdb/ingress.pyx | 109 +++++++++++++++++++++++++++++----- src/questdb/line_sender.pxd | 35 ++++++++++- test/test.py | 15 ++++- 6 files changed, 167 insertions(+), 22 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 14fc19f3..3ce862e7 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 14fc19f382ada9d58ce18884e7f42b10bbb36784 +Subproject commit 3ce862e74ebc56ffa966c1851c672a4c7e94d513 diff --git a/src/questdb/extra_cpython.pxd b/src/questdb/extra_cpython.pxd index 3e794566..bc339c6c 100644 --- a/src/questdb/extra_cpython.pxd +++ b/src/questdb/extra_cpython.pxd @@ -54,6 +54,8 @@ cdef extern from "Python.h": bint PyLong_CheckExact(PyObject* o) + bint PyArray_CheckExact(PyObject * o) + bint PyFloat_CheckExact(PyObject* o) double PyFloat_AS_DOUBLE(PyObject* o) diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index b75b6f55..16b6daed 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -38,6 +38,7 @@ from datetime import datetime, timedelta from enum import Enum from typing import Any, Dict, List, Optional, Union +import numpy as np import pandas as pd class IngressErrorCode(Enum): @@ -54,8 +55,17 @@ class IngressErrorCode(Enum): HttpNotSupported = ... ServerFlushError = ... ConfigError = ... + ArrayLargeDimError = ... + ArrayInternalError = ... + ArrayWriteToBufferError = ... + LineProtocolVersionError = ... BadDataFrame = ... +class LineProtocolVersion(Enum): + """Line protocol version.""" + LineProtocolVersionV1 = ... + LineProtocolVersionV2 = ... + class IngressError(Exception): """An error whilst using the ``Sender`` or constructing its ``Buffer``.""" @@ -194,7 +204,7 @@ class SenderTransaction: *, symbols: Optional[Dict[str, Optional[str]]] = None, columns: Optional[ - Dict[str, Union[None, bool, int, float, str, TimestampMicros, datetime]] + Dict[str, Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] ] = None, at: Union[ServerTimestamp, TimestampNanos, datetime], ) -> SenderTransaction: @@ -300,7 +310,7 @@ class Buffer: """ - def __init__(self, init_buf_size: int = 65536, max_name_len: int = 127): + def __init__(self, init_buf_size: int = 65536, max_name_len: int = 127, line_protocol_version: LineProtocolVersion = LineProtocolVersion.LineProtocolVersionV2): """ Create a new buffer with the an initial capacity and max name length. :param int init_buf_size: Initial capacity of the buffer in bytes. @@ -357,7 +367,7 @@ class Buffer: *, symbols: Optional[Dict[str, Optional[str]]] = None, columns: Optional[ - Dict[str, Union[None, bool, int, float, str, TimestampMicros, datetime]] + Dict[str, Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] ] = None, at: Union[ServerTimestamp, TimestampNanos, datetime], ) -> Buffer: @@ -806,6 +816,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + disable_line_protocol_validation: bool = False, init_buf_size: int = 65536, max_name_len: int = 127, ): ... @@ -831,6 +842,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + disable_line_protocol_validation: bool = False, init_buf_size: int = 65536, max_name_len: int = 127, ) -> Sender: @@ -866,6 +878,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + disable_line_protocol_validation: bool = False, init_buf_size: int = 65536, max_name_len: int = 127, ) -> Sender: @@ -925,6 +938,11 @@ class Sender: Time interval threshold for the auto-flush logic, or None if disabled. """ + def default_line_protocol_version(self) -> LineProtocolVersion: + """ + Returns the QuestDB server's recommended default line protocol version. + """ + def establish(self): """ Prepare the sender for use. @@ -968,7 +986,7 @@ class Sender: *, symbols: Optional[Dict[str, str]] = None, columns: Optional[ - Dict[str, Union[bool, int, float, str, TimestampMicros, datetime]] + Dict[str, Union[bool, int, float, str, TimestampMicros, datetime, np.ndarray]] ] = None, at: Union[TimestampNanos, datetime, ServerTimestamp], ) -> Sender: diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 3621368c..726668bf 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -82,6 +82,8 @@ from cpython.bytes cimport PyBytes_FromStringAndSize import sys import os +cimport numpy as cnp +import numpy as np # This value is automatically updated by the `bump2version` tool. @@ -89,6 +91,7 @@ import os # .bumpversion.cfg. VERSION = '2.0.3' +MAX_ARRAY_DIM = 32 cdef bint _has_gil(PyThreadState** gs): return gs[0] == NULL @@ -107,7 +110,6 @@ cdef void _ensure_has_gil(PyThreadState** gs): PyEval_RestoreThread(gs[0]) gs[0] = NULL - class IngressErrorCode(Enum): """Category of Error.""" CouldNotResolveAddr = line_sender_error_could_not_resolve_addr @@ -121,12 +123,20 @@ class IngressErrorCode(Enum): HttpNotSupported = line_sender_error_http_not_supported ServerFlushError = line_sender_error_server_flush_error ConfigError = line_sender_error_config_error - BadDataFrame = line_sender_error_server_flush_error + 1 + ArrayLargeDimError = line_sender_error_array_large_dim + ArrayInternalError = line_sender_error_array_view_internal_error + ArrayWriteToBufferError = line_sender_error_array_view_write_to_buffer_error + LineProtocolVersionError = line_sender_error_line_protocol_version_error + BadDataFrame = line_sender_error_line_protocol_version_error + 1 def __str__(self) -> str: """Return the name of the enum.""" return self.name +class LineProtocolVersion(Enum): + """Line protocol version.""" + LineProtocolVersionV1 = line_protocol_version_1 + LineProtocolVersionV2 = line_protocol_version_2 class IngressError(Exception): """An error whilst using the ``Sender`` or constructing its ``Buffer``.""" @@ -163,6 +173,14 @@ cdef inline object c_err_code_to_py(line_sender_error_code code): return IngressErrorCode.ServerFlushError elif code == line_sender_error_config_error: return IngressErrorCode.ConfigError + elif code == line_sender_error_array_large_dim: + return IngressErrorCode.ArrayLargeDimError + elif code == line_sender_error_array_view_internal_error: + return IngressErrorCode.ArrayInternalError + elif code == line_sender_error_array_view_write_to_buffer_error: + return IngressErrorCode.ArrayWriteToBufferError + elif code == line_sender_error_line_protocol_version_error: + return IngressErrorCode.LineProtocolVersionError else: raise ValueError('Internal error converting error code.') @@ -608,7 +626,7 @@ cdef class SenderTransaction: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime]] + Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] ]=None, at: Union[ServerTimestamp, TimestampNanos, datetime]): """ @@ -689,7 +707,6 @@ cdef class SenderTransaction: self._sender._in_txn = False self._complete = True - cdef class Buffer: """ Construct QuestDB-flavored InfluxDB Line Protocol (ILP) messages. @@ -760,18 +777,22 @@ cdef class Buffer: cdef size_t _max_name_len cdef object _row_complete_sender - def __cinit__(self, init_buf_size: int=65536, max_name_len: int=127): + def __cinit__(self, init_buf_size: int=65536, max_name_len: int=127, line_protocol_version: LineProtocolVersion=LineProtocolVersion.LineProtocolVersionV2): """ Create a new buffer with the an initial capacity and max name length. :param int init_buf_size: Initial capacity of the buffer in bytes. :param int max_name_len: Maximum length of a table or column name. """ - self._cinit_impl(init_buf_size, max_name_len) + self._cinit_impl(init_buf_size, max_name_len, line_protocol_version.value) - cdef inline _cinit_impl(self, size_t init_buf_size, size_t max_name_len): + cdef inline _cinit_impl(self, size_t init_buf_size, size_t max_name_len, line_protocol_version version): self._impl = line_sender_buffer_with_max_name_len(max_name_len) self._b = qdb_pystr_buf_new() line_sender_buffer_reserve(self._impl, init_buf_size) + cdef line_sender_error* err = NULL + if not line_sender_buffer_set_line_protocol_version(self._impl, version, &err): + raise c_err_to_py(err) + self._init_buf_size = init_buf_size self._max_name_len = max_name_len self._row_complete_sender = None @@ -905,6 +926,20 @@ cdef class Buffer: if not line_sender_buffer_column_ts_micros(self._impl, c_name, ts._value, &err): raise c_err_to_py(err) + cdef inline void_int _column_numpy( + self, line_sender_column_name c_name, cnp.ndarray arr) except -1: + if cnp.PyArray_DTYPE(arr).kind != b'f': + raise ValueError('expect float64 array') + cdef size_t rank = cnp.PyArray_NDIM(arr) + if rank == 0: + raise ValueError('Zero-dimensional arrays are not supported') + if rank > MAX_ARRAY_DIM: + raise ValueError(f'Array dimension mismatch: expected at most {MAX_ARRAY_DIM} dimensions, but got {rank}') + cdef line_sender_error* err = NULL + if not line_sender_buffer_column_f64_arr( + self._impl, c_name, rank, cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), cnp.PyArray_BYTES(arr), cnp.PyArray_NBYTES(arr), &err): + raise c_err_to_py(err) + cdef inline void_int _column_dt( self, line_sender_column_name c_name, datetime dt) except -1: cdef line_sender_error* err = NULL @@ -925,6 +960,8 @@ cdef class Buffer: self._column_str(c_name, value) elif isinstance(value, TimestampMicros): self._column_ts(c_name, value) + elif PyArray_CheckExact( value): + self._column_numpy(c_name, value) elif isinstance(value, datetime): self._column_dt(c_name, value) else: @@ -934,7 +971,8 @@ cdef class Buffer: 'float', 'str', 'TimestampMicros', - 'datetime.datetime')) + 'datetime.datetime' + 'np.ndarray')) raise TypeError( f'Unsupported type: {_fqn(type(value))}. Must be one of: {valid}') @@ -1016,7 +1054,7 @@ cdef class Buffer: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime]] + Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] ]=None, at: Union[ServerTimestamp, TimestampNanos, datetime]): """ @@ -1706,6 +1744,7 @@ cdef object parse_conf_str( 'auto_flush_rows': str, 'auto_flush_bytes': str, 'auto_flush_interval': str, + 'disable_line_protocol_version': str, 'init_buf_size': int, 'max_name_len': int, } @@ -1759,6 +1798,7 @@ cdef class Sender: object auto_flush_rows, object auto_flush_bytes, object auto_flush_interval, + object disable_line_protocol_validation, object init_buf_size, object max_name_len) except -1: """ @@ -1906,11 +1946,30 @@ cdef class Sender: auto_flush_interval, &self._auto_flush_mode) + if isinstance(disable_line_protocol_validation, str): + if disable_line_protocol_validation == 'off': + disable_line_protocol_validation = False + elif disable_line_protocol_validation == 'on': + disable_line_protocol_validation = True + else: + raise IngressError( + IngressErrorCode.ConfigError, + '"disable_line_protocol_validation" must be None, bool, "on" or "off", ' + + f'not {disable_line_protocol_validation!r}') + + if disable_line_protocol_validation is None: + disable_line_protocol_validation = False + elif not isinstance(disable_line_protocol_validation, bool): + raise ValueError( + '"disable_line_protocol_validation" must be None, bool, "on" or "off", ' + + f'not {disable_line_protocol_validation!r}') + + if disable_line_protocol_validation: + if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): + raise c_err_to_py(err) + self._init_buf_size = init_buf_size or 65536 self._max_name_len = max_name_len or 127 - self._buffer = Buffer( - init_buf_size=self._init_buf_size, - max_name_len=self._max_name_len) self._last_flush_ms = calloc(1, sizeof(int64_t)) def __cinit__(self): @@ -1948,6 +2007,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds + object disable_line_protocol_validation=None, # Default off object init_buf_size=None, # 64KiB object max_name_len=None): # 127 @@ -1991,6 +2051,7 @@ cdef class Sender: auto_flush_rows, auto_flush_bytes, auto_flush_interval, + disable_line_protocol_validation, init_buf_size, max_name_len) finally: @@ -2018,6 +2079,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds + object disable_line_protocol_validation=None, # Default off object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2072,6 +2134,7 @@ cdef class Sender: 'auto_flush_rows': auto_flush_rows, 'auto_flush_bytes': auto_flush_bytes, 'auto_flush_interval': auto_flush_interval, + 'disable_line_protocol_validation': disable_line_protocol_validation, 'init_buf_size': init_buf_size, 'max_name_len': max_name_len, }.items(): @@ -2112,6 +2175,7 @@ cdef class Sender: params.get('auto_flush_rows'), params.get('auto_flush_bytes'), params.get('auto_flush_interval'), + params.get('disable_line_protocol_validation'), params.get('init_buf_size'), params.get('max_name_len')) @@ -2140,6 +2204,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds + object disable_line_protocol_validation=None, # Default off object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2179,6 +2244,7 @@ cdef class Sender: auto_flush_rows=auto_flush_rows, auto_flush_bytes=auto_flush_bytes, auto_flush_interval=auto_flush_interval, + disable_line_protocol_validation=disable_line_protocol_validation, init_buf_size=init_buf_size, max_name_len=max_name_len) @@ -2192,7 +2258,8 @@ cdef class Sender: """ return Buffer( init_buf_size=self._init_buf_size, - max_name_len=self._max_name_len) + max_name_len=self._max_name_len, + line_protocol_version=self.default_line_protocol_version()) @property def init_buf_size(self) -> int: @@ -2247,6 +2314,13 @@ cdef class Sender: return None return timedelta(milliseconds=self._auto_flush_mode.interval) + def default_line_protocol_version(self) -> LineProtocolVersion: + if self._impl == NULL: + raise IngressError( + IngressErrorCode.InvalidApiCall, + 'default_line_protocol_version() can\'t be called: Not connected.') + return LineProtocolVersion(line_sender_default_line_protocol_version(self._impl)) + def establish(self): """ Prepare the sender for use. @@ -2267,6 +2341,13 @@ cdef class Sender: self._impl = line_sender_build(self._opts, &err) if self._impl == NULL: raise c_err_to_py(err) + + if self._buffer is None: + self._buffer = Buffer( + init_buf_size=self._init_buf_size, + max_name_len=self._max_name_len, + line_protocol_version=self.default_line_protocol_version()) + line_sender_opts_free(self._opts) self._opts = NULL @@ -2311,7 +2392,7 @@ cdef class Sender: symbols: Optional[Dict[str, str]]=None, columns: Optional[Dict[ str, - Union[bool, int, float, str, TimestampMicros, datetime]]]=None, + Union[bool, int, float, str, TimestampMicros, datetime, np.ndarray]]]=None, at: Union[TimestampNanos, datetime, ServerTimestamp]): """ Write a row to the internal buffer. diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index d5759fa3..43f17033 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -22,7 +22,7 @@ ## ################################################################################ -from libc.stdint cimport int64_t, uint16_t, uint64_t, uint8_t +from libc.stdint cimport int64_t, uint16_t, uint64_t, uint8_t, uint32_t, int32_t cdef extern from "questdb/ingress/line_sender.h": cdef struct line_sender_error: @@ -40,6 +40,10 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error_http_not_supported, line_sender_error_server_flush_error, line_sender_error_config_error, + line_sender_error_array_large_dim + line_sender_error_array_view_internal_error + line_sender_error_array_view_write_to_buffer_error + line_sender_error_line_protocol_version_error cdef enum line_sender_protocol: line_sender_protocol_tcp, @@ -47,6 +51,10 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_protocol_http, line_sender_protocol_https, + cdef enum line_protocol_version: + line_protocol_version_1 = 1, + line_protocol_version_2 = 2, + cdef enum line_sender_ca: line_sender_ca_webpki_roots, line_sender_ca_os_roots, @@ -128,6 +136,12 @@ cdef extern from "questdb/ingress/line_sender.h": size_t max_name_len ) noexcept nogil + bint line_sender_buffer_set_line_protocol_version( + line_sender_buffer* buffer, + line_protocol_version version, + line_sender_error** err_out + ) noexcept nogil + void line_sender_buffer_free( line_sender_buffer* buffer ) noexcept nogil @@ -220,6 +234,17 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil + bint line_sender_buffer_column_f64_arr( + line_sender_buffer* buffer, + line_sender_column_name name, + size_t rank, + const size_t* shapes, + const ssize_t* strides, + const uint8_t* data_buffer, + size_t data_buffer_len, + line_sender_error** err_out + ) noexcept nogil + bint line_sender_buffer_column_ts_nanos( line_sender_buffer* buffer, line_sender_column_name name, @@ -314,6 +339,11 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil + bint line_sender_opts_disable_line_protocol_validation( + line_sender_opts* opts, + line_sender_error** err_out + ) noexcept nogil + bint line_sender_opts_auth_timeout( line_sender_opts* opts, uint64_t millis, @@ -384,6 +414,9 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil + line_protocol_version line_sender_default_line_protocol_version( + const line_sender * sender); + bint line_sender_must_close( const line_sender* sender ) noexcept nogil diff --git a/test/test.py b/test/test.py index 03a2212b..1d295b60 100755 --- a/test/test.py +++ b/test/test.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import struct import sys sys.dont_write_bytecode = True @@ -41,6 +41,12 @@ def test_no_pandas(self): buf.dataframe(None, at=qi.ServerTimestamp) +def _float_binary_bytes(value: float, text_format: bool = False) -> bytes: + if text_format: + return f"={value}".encode('utf-8') + else: + return b'==' + struct.pack(' Date: Fri, 9 May 2025 08:30:37 +0800 Subject: [PATCH 06/15] python f64 array interface. --- c-questdb-client | 2 +- setup.py | 4 +++- src/questdb/ingress.pyx | 24 ++++++++++++++++++------ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 3ce862e7..d76b272c 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 3ce862e74ebc56ffa966c1851c672a4c7e94d513 +Subproject commit d76b272c547d85cbe90002c56a445071b0ee12c1 diff --git a/setup.py b/setup.py index 6f453d20..e3ba41fe 100755 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ import os import shutil import platform +import numpy as np from setuptools import setup, find_packages from setuptools.extension import Extension @@ -83,7 +84,8 @@ def ingress_extension(): ["src/questdb/ingress.pyx"], include_dirs=[ "c-questdb-client/include", - "pystr-to-utf8/include"], + "pystr-to-utf8/include", + np.get_include()], library_dirs=lib_paths, libraries=libraries, extra_compile_args=extra_compile_args, diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 726668bf..be2d5211 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -85,6 +85,11 @@ import os cimport numpy as cnp import numpy as np +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_Descr: + int type_num + enum: NPY_FLOAT64 +cnp.import_array() # This value is automatically updated by the `bump2version` tool. # If you need to update it, also update the search definition in @@ -928,16 +933,23 @@ cdef class Buffer: cdef inline void_int _column_numpy( self, line_sender_column_name c_name, cnp.ndarray arr) except -1: - if cnp.PyArray_DTYPE(arr).kind != b'f': - raise ValueError('expect float64 array') - cdef size_t rank = cnp.PyArray_NDIM(arr) + cdef PyArray_Descr * dtype_ptr = cnp.PyArray_DESCR(arr) + if dtype_ptr.type_num != NPY_FLOAT64: + raise ValueError('Expected float64 array, got: %s' % str(arr.dtype)) + cdef: + size_t rank = cnp.PyArray_NDIM(arr) + const uint8_t * data_ptr + line_sender_error * err = NULL + if rank == 0: raise ValueError('Zero-dimensional arrays are not supported') if rank > MAX_ARRAY_DIM: - raise ValueError(f'Array dimension mismatch: expected at most {MAX_ARRAY_DIM} dimensions, but got {rank}') - cdef line_sender_error* err = NULL + raise ValueError(f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') + data_ptr = cnp.PyArray_DATA(arr) + if not line_sender_buffer_column_f64_arr( - self._impl, c_name, rank, cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), cnp.PyArray_BYTES(arr), cnp.PyArray_NBYTES(arr), &err): + self._impl, c_name, rank, cnp.PyArray_DIMS(arr), + cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): raise c_err_to_py(err) cdef inline void_int _column_dt( From a4c02f14cf0bfb8eec3a2f11b01350bde87c5e68 Mon Sep 17 00:00:00 2001 From: victor Date: Fri, 9 May 2025 16:39:45 +0800 Subject: [PATCH 07/15] add python tests --- c-questdb-client | 2 +- test/mock_server.py | 34 ++++++++++++++++- test/test.py | 89 ++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index d76b272c..8d739f1c 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit d76b272c547d85cbe90002c56a445071b0ee12c1 +Subproject commit 8d739f1cf895bcf44b22db95eb0f5b50a2eebeb4 diff --git a/test/mock_server.py b/test/mock_server.py index 281b1742..9f0d2e3c 100644 --- a/test/mock_server.py +++ b/test/mock_server.py @@ -1,3 +1,4 @@ +import json import socket import select import re @@ -59,12 +60,16 @@ def close(self): def __exit__(self, _ex_type, _ex_value, _ex_tb): self.close() +SETTINGS_WITH_PROTOCOL_VERSION = b'{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "line.proto.default.version": 2, "line.proto.support.versions": [1, 2], "ilp.proto.transports": [ "tcp", "http" ], "posthog.enabled": false, "posthog.api.key": null }' +SETTINGS_WITHOUT_PROTOCOL_VERSION = b'{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "posthog.enabled": false, "posthog.api.key": null }' + class HttpServer: - def __init__(self, delay_seconds=0): + def __init__(self, delay_seconds=0, settings=SETTINGS_WITH_PROTOCOL_VERSION): self.delay_seconds = delay_seconds self.requests = [] self.responses = [] self.headers = [] + self.settings = settings.decode('utf-8') self._ready_event = None self._stop_event = None self._http_server = None @@ -79,8 +84,33 @@ def create_handler(self): requests = self.requests headers = self.headers responses = self.responses + server_settings = self.settings.encode('utf-8') class IlpHttpHandler(hs.BaseHTTPRequestHandler): + def do_GET(self): + try: + time.sleep(delay_seconds) + headers.append(dict(self.headers.items())) + content_length = self.headers.get('Content-Length', 0) + if content_length: + self.rfile.read(int(content_length)) + + if len(server_settings) == 0: + self.send_error(404, "Endpoint not found") + else: + if self.path == '/settings': + response_data = server_settings + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', len(response_data)) + self.end_headers() + self.wfile.write(response_data) + self.wfile.flush() + else: + self.send_error(404, "Endpoint not found") + except BrokenPipeError: + pass + def do_POST(self): time.sleep(delay_seconds) @@ -110,7 +140,7 @@ def do_POST(self): def __enter__(self): self._stop_event = threading.Event() handler_class = self.create_handler() - self._http_server = hs.HTTPServer(('', 0), handler_class, bind_and_activate=True) + self._http_server = hs.HTTPServer(('', 1111), handler_class, bind_and_activate=True) self._http_server_thread = threading.Thread(target=self._serve) self._http_server_thread.start() return self diff --git a/test/test.py b/test/test.py index 1d295b60..894faf5d 100755 --- a/test/test.py +++ b/test/test.py @@ -10,6 +10,7 @@ from enum import Enum import random import pathlib +import numpy as np import patch_path @@ -47,6 +48,41 @@ def _float_binary_bytes(value: float, text_format: bool = False) -> bytes: else: return b'==' + struct.pack(' bytes: + header = b'=' + format_type = struct.pack(' Date: Fri, 9 May 2025 17:01:16 +0800 Subject: [PATCH 08/15] fix http port --- test/mock_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mock_server.py b/test/mock_server.py index 9f0d2e3c..3dace5b7 100644 --- a/test/mock_server.py +++ b/test/mock_server.py @@ -140,7 +140,7 @@ def do_POST(self): def __enter__(self): self._stop_event = threading.Event() handler_class = self.create_handler() - self._http_server = hs.HTTPServer(('', 1111), handler_class, bind_and_activate=True) + self._http_server = hs.HTTPServer(('', 0), handler_class, bind_and_activate=True) self._http_server_thread = threading.Thread(target=self._serve) self._http_server_thread.start() return self From cc806dd07f3667f19cb1363da17bca2ec675c35d Mon Sep 17 00:00:00 2001 From: victor Date: Sat, 10 May 2025 00:12:32 +0800 Subject: [PATCH 09/15] fix some python tests. --- src/questdb/ingress.pyx | 14 ++++++++++++++ test/test.py | 21 ++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index be2d5211..8af5557e 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -1982,6 +1982,14 @@ cdef class Sender: self._init_buf_size = init_buf_size or 65536 self._max_name_len = max_name_len or 127 + + # self._buffer will be constructed after establish connection. + if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: + self._buffer = Buffer( + init_buf_size=self._init_buf_size, + max_name_len=self._max_name_len, + line_protocol_version=LineProtocolVersion.LineProtocolVersionV2) + self._last_flush_ms = calloc(1, sizeof(int64_t)) def __cinit__(self): @@ -2327,6 +2335,9 @@ cdef class Sender: return timedelta(milliseconds=self._auto_flush_mode.interval) def default_line_protocol_version(self) -> LineProtocolVersion: + if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: + return LineProtocolVersion.LineProtocolVersionV2 + if self._impl == NULL: raise IngressError( IngressErrorCode.InvalidApiCall, @@ -2346,11 +2357,14 @@ cdef class Sender: method will return only *after* the handshake(s) is/are complete. """ cdef line_sender_error* err = NULL + cdef PyThreadState * gs = NULL if self._opts == NULL: raise IngressError( IngressErrorCode.InvalidApiCall, 'establish() can\'t be called after close().') + _ensure_doesnt_have_gil(&gs) self._impl = line_sender_build(self._opts, &err) + _ensure_has_gil(&gs) if self._impl == NULL: raise c_err_to_py(err) diff --git a/test/test.py b/test/test.py index 894faf5d..cb425a52 100755 --- a/test/test.py +++ b/test/test.py @@ -331,7 +331,7 @@ class TestBases: class TestSender(unittest.TestCase): def test_transaction_row_at_disallows_none(self): - with Server() as server, self.builder('http', 'localhost', server.port) as sender: + with Server() as server, self.builder('http', 'localhost', server.port, disable_line_protocol_validation=True) as sender: with self.assertRaisesRegex( qi.IngressError, 'must be of type TimestampNanos, datetime, or ServerTimestamp'): @@ -511,12 +511,12 @@ def test_two_rows_explicit_buffer(self): columns={'price': '111222233343i', 'qty': 2.5}, at=qi.TimestampNanos(111222233343)) exp = ( - 'line_sender_buffer_example2,id=Hola price="111222233333i",qty=3.5 111222233333\n' - 'line_sender_example,id=Adios price="111222233343i",qty=2.5 111222233343\n') - self.assertEqual(bytes(buffer), exp.encode('utf-8')) + b'line_sender_buffer_example2,id=Hola price="111222233333i",qty' + _float_binary_bytes(3.5) + b' 111222233333\n' + b'line_sender_example,id=Adios price="111222233343i",qty' + _float_binary_bytes(2.5) + b' 111222233343\n') + self.assertEqual(bytes(buffer), exp) sender.flush(buffer) msgs = server.recv() - bexp = [msg.encode('utf-8') for msg in exp.rstrip().split('\n')] + bexp = [msg for msg in exp.rstrip().split(b'\n')] self.assertEqual(msgs, bexp) def test_independent_buffer(self): @@ -998,14 +998,14 @@ def test_http_username_password(self): sender.row('tbl1', columns={'x': 42}, at=qi.ServerTimestamp) self.assertEqual(len(server.requests), 1) self.assertEqual(server.requests[0], b'tbl1 x=42i\n') - self.assertEqual(server.headers[0]['Authorization'], 'Basic dXNlcjpwYXNz') + self.assertEqual(server.headers[1]['authorization'], 'Basic dXNlcjpwYXNz') def test_http_token(self): with HttpServer() as server, self.builder('http', 'localhost', server.port, token='Yogi') as sender: sender.row('tbl1', columns={'x': 42}, at=qi.ServerTimestamp) self.assertEqual(len(server.requests), 1) self.assertEqual(server.requests[0], b'tbl1 x=42i\n') - self.assertEqual(server.headers[0]['Authorization'], 'Bearer Yogi') + self.assertEqual(server.headers[1]['authorization'], 'Bearer Yogi') def test_max_buf_size(self): with HttpServer() as server, self.builder('http', 'localhost', server.port, max_buf_size=1024, @@ -1048,6 +1048,7 @@ def test_http_request_min_throughput(self): 'localhost', server.port, request_timeout=1000, + disable_line_protocol_validation=True, # request_timeout is sufficiently high since it's also used as a connect timeout and we want to # survive hiccups on CI. it should be lower than the server delay though to actually test the # effect of request_min_throughput. @@ -1064,6 +1065,7 @@ def test_http_request_min_throughput_timeout(self): auto_flush='off', request_timeout=1, retry_timeout=0, + disable_line_protocol_validation=True, request_min_throughput=100000000) as sender: sender.row('tbl1', columns={'x': 42}, at=qi.ServerTimestamp) sender.row('tbl1', columns={'x': 42}, at=qi.ServerTimestamp) @@ -1074,7 +1076,7 @@ def test_http_request_min_throughput_timeout(self): # wait 5ms in the server to simulate a slow response server.responses.append((5, 200, 'text/plain', b'OK')) - with self.assertRaisesRegex(qi.IngressError, 'timed out reading response'): + with self.assertRaisesRegex(qi.IngressError, 'timeout: per call'): sender.flush() def test_http_request_timeout(self): @@ -1084,11 +1086,12 @@ def test_http_request_timeout(self): server.port, retry_timeout=0, request_min_throughput=0, # disable + disable_line_protocol_validation=True, request_timeout=datetime.timedelta(milliseconds=5)) as sender: # wait for 10ms in the server to simulate a slow response server.responses.append((20, 200, 'text/plain', b'OK')) sender.row('tbl1', columns={'x': 42}, at=qi.ServerTimestamp) - with self.assertRaisesRegex(qi.IngressError, 'timed out reading response'): + with self.assertRaisesRegex(qi.IngressError, 'timeout: per call'): sender.flush() class Timestamp(unittest.TestCase): From b94ee75f3855d1c731afa9c117e7a69deb8a2a59 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 12 May 2025 16:19:54 +0800 Subject: [PATCH 10/15] add line protocol version tests, array tests, and dataframe support ndarray ingress. todo: dataframe support ndarray tests --- src/questdb/dataframe.pxi | 52 +- src/questdb/ingress.pyx | 83 +- test/common_tools.py | 53 + test/mock_server.py | 60 +- test/test.py | 265 +++- test/test_dataframe.py | 2941 +++++++++++++++++++------------------ 6 files changed, 1891 insertions(+), 1563 deletions(-) create mode 100644 test/common_tools.py diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index 7601587b..36bb9480 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -73,7 +73,8 @@ cdef enum col_target_t: col_target_column_f64 = 5 col_target_column_str = 6 col_target_column_ts = 7 - col_target_at = 8 + col_target_column_array = 8 + col_target_at = 9 cdef dict _TARGET_NAMES = { @@ -85,6 +86,7 @@ cdef dict _TARGET_NAMES = { col_target_t.col_target_column_f64: "float", col_target_t.col_target_column_str: "string", col_target_t.col_target_column_ts: "timestamp", + col_target_t.col_target_column_array: "array", col_target_t.col_target_at: "designated timestamp", } @@ -125,6 +127,7 @@ cdef enum col_source_t: col_source_str_lrg_utf8_arrow = 406000 col_source_dt64ns_numpy = 501000 col_source_dt64ns_tz_arrow = 502000 + col_source_array_numpy = 503000 cdef bint col_source_needs_gil(col_source_t source) noexcept nogil: @@ -213,6 +216,9 @@ cdef dict _TARGET_TO_SOURCES = { col_source_t.col_source_dt64ns_numpy, col_source_t.col_source_dt64ns_tz_arrow, }, + col_target_t.col_target_column_array: { + col_source_t.col_source_array_numpy, + }, col_target_t.col_target_at: { col_source_t.col_source_dt64ns_numpy, col_source_t.col_source_dt64ns_tz_arrow, @@ -227,7 +233,8 @@ cdef tuple _FIELD_TARGETS = ( col_target_t.col_target_column_i64, col_target_t.col_target_column_f64, col_target_t.col_target_column_str, - col_target_t.col_target_column_ts) + col_target_t.col_target_column_ts, + col_target_t.col_target_column_array) # Targets that map directly from a meta target. @@ -349,6 +356,9 @@ cdef enum col_dispatch_code_t: col_dispatch_code_at__dt64ns_tz_arrow = \ col_target_t.col_target_at + col_source_t.col_source_dt64ns_tz_arrow + col_dispatch_code_column_array__array_numpy = \ + col_target_t.col_target_column_array + col_source_t.col_source_array_numpy + # Int values in order for sorting (as needed for API's sequential coupling). cdef enum meta_target_t: @@ -452,6 +462,7 @@ cdef object _NUMPY_FLOAT32 = None cdef object _NUMPY_FLOAT64 = None cdef object _NUMPY_DATETIME64_NS = None cdef object _NUMPY_OBJECT = None +cdef object _NUMPY_ARRAY = None cdef object _PANDAS = None # module object cdef object _PANDAS_NA = None # pandas.NA cdef object _PYARROW = None # module object, if available or None @@ -484,6 +495,7 @@ cdef object _dataframe_may_import_deps(): global _NUMPY_FLOAT32 global _NUMPY_FLOAT64 global _NUMPY_DATETIME64_NS + global _NUMPY_ARRAY global _NUMPY_OBJECT if _NUMPY is not None: return @@ -510,6 +522,7 @@ cdef object _dataframe_may_import_deps(): _NUMPY_FLOAT32 = type(_NUMPY.dtype('float32')) _NUMPY_FLOAT64 = type(_NUMPY.dtype('float64')) _NUMPY_DATETIME64_NS = type(_NUMPY.dtype('datetime64[ns]')) + __NUMPY_ARRAY = _NUMPY.ndarray _NUMPY_OBJECT = type(_NUMPY.dtype('object')) _PANDAS = pandas _PANDAS_NA = pandas.NA @@ -1052,6 +1065,9 @@ cdef void_int _dataframe_resolve_source_and_buffers( _dataframe_is_supported_datetime(dtype)): col.setup.source = col_source_t.col_source_dt64ns_tz_arrow _dataframe_series_as_arrow(pandas_col, col) + elif isinstance(dtype, _NUMPY_ARRAY): + col.setup.source = col_source_t.col_source_array_numpy + _dataframe_series_as_pybuf(pandas_col, col) elif isinstance(dtype, _NUMPY_OBJECT): _dataframe_series_sniff_pyobj(pandas_col, col) else: @@ -2016,6 +2032,36 @@ cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_numpy( _ensure_has_gil(gs) raise c_err_to_py(err) +cimport numpy as cnp +cnp.import_array() + +cdef void_int _dataframe_serialize_cell_column_array__array_numpy( + line_sender_buffer* ls_buf, + qdb_pystr_buf* b, + col_t* col, + PyThreadState** gs) except -1: + + cdef cnp.ndarray arr = col.cursor.chunk.buffers[1] + cdef PyArray_Descr* dtype_ptr = cnp.PyArray_DESCR(arr) + if dtype_ptr.type_num != NPY_FLOAT64: + raise IngressError(IngressErrorCode.ArrayWriteToBufferError, + 'Only support float64 array, got: %s' % str(arr.dtype)) + cdef: + size_t rank = cnp.PyArray_NDIM(arr) + const uint8_t * data_ptr + line_sender_error * err = NULL + + if rank == 0: + raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') + if rank > MAX_ARRAY_DIM: + raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') + data_ptr = cnp.PyArray_DATA(arr) + + if not line_sender_buffer_column_f64_arr( + ls_buf, col.name, rank, cnp.PyArray_DIMS(arr), + cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): + raise c_err_to_py(err) + cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow( line_sender_buffer* ls_buf, @@ -2173,6 +2219,8 @@ cdef void_int _dataframe_serialize_cell( _dataframe_serialize_cell_column_str__str_i32_cat(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_numpy: _dataframe_serialize_cell_column_ts__dt64ns_numpy(ls_buf, b, col, gs) + elif dc == col_dispatch_code_t.col_dispatch_code_column_array__array_numpy: + _dataframe_serialize_cell_column_ts__dt64ns_numpy(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_tz_arrow: _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_at__dt64ns_numpy: diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 8af5557e..efb6f71b 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -935,16 +935,16 @@ cdef class Buffer: self, line_sender_column_name c_name, cnp.ndarray arr) except -1: cdef PyArray_Descr * dtype_ptr = cnp.PyArray_DESCR(arr) if dtype_ptr.type_num != NPY_FLOAT64: - raise ValueError('Expected float64 array, got: %s' % str(arr.dtype)) + raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Only support float64 array, got: %s' % str(arr.dtype)) cdef: size_t rank = cnp.PyArray_NDIM(arr) const uint8_t * data_ptr line_sender_error * err = NULL if rank == 0: - raise ValueError('Zero-dimensional arrays are not supported') + raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') if rank > MAX_ARRAY_DIM: - raise ValueError(f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') + raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') data_ptr = cnp.PyArray_DATA(arr) if not line_sender_buffer_column_f64_arr( @@ -1787,6 +1787,8 @@ cdef class Sender: cdef size_t _init_buf_size cdef size_t _max_name_len cdef bint _in_txn + cdef line_protocol_version _line_protocol_version + cdef bint _auto_detect_line_protocol_version cdef void_int _set_sender_fields( self, @@ -1810,7 +1812,7 @@ cdef class Sender: object auto_flush_rows, object auto_flush_bytes, object auto_flush_interval, - object disable_line_protocol_validation, + str default_line_protocol_version, object init_buf_size, object max_name_len) except -1: """ @@ -1958,37 +1960,39 @@ cdef class Sender: auto_flush_interval, &self._auto_flush_mode) - if isinstance(disable_line_protocol_validation, str): - if disable_line_protocol_validation == 'off': - disable_line_protocol_validation = False - elif disable_line_protocol_validation == 'on': - disable_line_protocol_validation = True - else: + # default line protocol version is v2 for tcp/tcps and auto-detection for http/https + if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: + self._line_protocol_version = line_protocol_version_2 + self._auto_detect_line_protocol_version = False + else: + self._auto_detect_line_protocol_version = True + + if default_line_protocol_version is not None: + if default_line_protocol_version == "v1": + self._line_protocol_version = line_protocol_version_1 + self._auto_detect_line_protocol_version = False + if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): + raise c_err_to_py(err) + elif default_line_protocol_version == "v2": + self._line_protocol_version = line_protocol_version_2 + self._auto_detect_line_protocol_version = False + if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): + raise c_err_to_py(err) + elif default_line_protocol_version != "auto": raise IngressError( IngressErrorCode.ConfigError, - '"disable_line_protocol_validation" must be None, bool, "on" or "off", ' + - f'not {disable_line_protocol_validation!r}') - - if disable_line_protocol_validation is None: - disable_line_protocol_validation = False - elif not isinstance(disable_line_protocol_validation, bool): - raise ValueError( - '"disable_line_protocol_validation" must be None, bool, "on" or "off", ' + - f'not {disable_line_protocol_validation!r}') - - if disable_line_protocol_validation: - if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): - raise c_err_to_py(err) + '"default_line_protocol_version" must be None, "auto", "v1" or "v2"' + + f'not {default_line_protocol_version!r}') self._init_buf_size = init_buf_size or 65536 self._max_name_len = max_name_len or 127 - # self._buffer will be constructed after establish connection. + # self._buffer will be constructed after establish connection for http/https. if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: self._buffer = Buffer( init_buf_size=self._init_buf_size, max_name_len=self._max_name_len, - line_protocol_version=LineProtocolVersion.LineProtocolVersionV2) + line_protocol_version=LineProtocolVersion(self._line_protocol_version)) self._last_flush_ms = calloc(1, sizeof(int64_t)) @@ -2027,7 +2031,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object disable_line_protocol_validation=None, # Default off + object default_line_protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 @@ -2071,7 +2075,7 @@ cdef class Sender: auto_flush_rows, auto_flush_bytes, auto_flush_interval, - disable_line_protocol_validation, + default_line_protocol_version, init_buf_size, max_name_len) finally: @@ -2099,7 +2103,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object disable_line_protocol_validation=None, # Default off + object default_line_protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2154,7 +2158,7 @@ cdef class Sender: 'auto_flush_rows': auto_flush_rows, 'auto_flush_bytes': auto_flush_bytes, 'auto_flush_interval': auto_flush_interval, - 'disable_line_protocol_validation': disable_line_protocol_validation, + 'default_line_protocol_version': default_line_protocol_version, 'init_buf_size': init_buf_size, 'max_name_len': max_name_len, }.items(): @@ -2195,7 +2199,7 @@ cdef class Sender: params.get('auto_flush_rows'), params.get('auto_flush_bytes'), params.get('auto_flush_interval'), - params.get('disable_line_protocol_validation'), + params.get('default_line_protocol_version'), params.get('init_buf_size'), params.get('max_name_len')) @@ -2224,7 +2228,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object disable_line_protocol_validation=None, # Default off + object default_line_protocol_version=None, # Default off object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2264,7 +2268,7 @@ cdef class Sender: auto_flush_rows=auto_flush_rows, auto_flush_bytes=auto_flush_bytes, auto_flush_interval=auto_flush_interval, - disable_line_protocol_validation=disable_line_protocol_validation, + default_line_protocol_version=default_line_protocol_version, init_buf_size=init_buf_size, max_name_len=max_name_len) @@ -2335,14 +2339,13 @@ cdef class Sender: return timedelta(milliseconds=self._auto_flush_mode.interval) def default_line_protocol_version(self) -> LineProtocolVersion: - if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: - return LineProtocolVersion.LineProtocolVersionV2 - - if self._impl == NULL: - raise IngressError( - IngressErrorCode.InvalidApiCall, - 'default_line_protocol_version() can\'t be called: Not connected.') - return LineProtocolVersion(line_sender_default_line_protocol_version(self._impl)) + if self._auto_detect_line_protocol_version: + if self._impl == NULL: + raise IngressError( + IngressErrorCode.InvalidApiCall, + 'default_line_protocol_version() can\'t be called: Not connected.') + return LineProtocolVersion(line_sender_default_line_protocol_version(self._impl)) + return LineProtocolVersion(self._line_protocol_version) def establish(self): """ diff --git a/test/common_tools.py b/test/common_tools.py new file mode 100644 index 00000000..69da3ae2 --- /dev/null +++ b/test/common_tools.py @@ -0,0 +1,53 @@ + +import struct +import numpy as np + +ARRAY_TYPE_TAGS = { + np.float64: 10, +} + +import math +import struct + +def _float_binary_bytes(value: float, text_format: bool = False) -> bytes: + if text_format: + if math.isnan(value): + return b'=NaN' + elif math.isinf(value): + return f'={"-Infinity" if value < 0 else "Infinity"}'.encode('utf-8') + else: + return f'={value}'.encode('utf-8').replace(b'+', b'') + else: + return b'==' + struct.pack(' bytes: + header = b'=' + format_type = struct.pack(' len(buf): + break + index = new_index + continue + + if index > 0 and buf[index] == ord('\n') and buf[index - 1] != ord('\\'): + new_msgs.append(buf[head:index]) + head = index + 1 + + index += 1 + self.msgs.extend(new_msgs) return new_msgs + def _parse_binary_data(self, buf, index): + if buf[index] != ord('=') or index + 1 >= len(buf) or buf[index + 1] != ord('='): + return index + + index += 2 # skip "==" + if index >= len(buf): + return index + binary_type = buf[index] + index += 1 + + if binary_type == 16: + index += 8 + elif binary_type == 14: + # dims + if index + 1 >= len(buf): + return index + index += 1 + if index >= len(buf): + return index + dims = buf[index] + index += 1 + + total_elements = 1 + for _ in range(dims): + if index + 4 > len(buf): + return index + dim_size = struct.unpack(' bytes: - if text_format: - return f"={value}".encode('utf-8') - else: - return b'==' + struct.pack(' bytes: - header = b'=' - format_type = struct.pack('.: .*insert null .*boolean col'): - _dataframe(df2, table_name='tbl1', at=qi.ServerTimestamp) - - def test_bool_obj_col(self): - df = pd.DataFrame({'a': pd.Series([ - True, False, False, - False, True, False], - dtype='object')}) - buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1 a=t\n' + - b'tbl1 a=f\n' + - b'tbl1 a=f\n' + - b'tbl1 a=f\n' + - b'tbl1 a=t\n' + - b'tbl1 a=f\n') - - df2 = pd.DataFrame({'a': pd.Series([ - True, False, 'false'], - dtype='object')}) - with self.assertRaisesRegex( - qi.IngressError, - 'serialize .* column .a. .* 2 .*false.*bool'): - _dataframe(df2, table_name='tbl1', at=qi.ServerTimestamp) - - df3 = pd.DataFrame({'a': pd.Series([ - None, True, False], - dtype='object')}) - with self.assertRaisesRegex( - qi.IngressError, - 'serialize.*\\(None\\): Cannot insert null.*boolean column'): - _dataframe(df3, table_name='tbl1', at=qi.ServerTimestamp) - - def test_datetime64_numpy_col(self): - df = pd.DataFrame({ - 'a': pd.Series([ - pd.Timestamp('2019-01-01 00:00:00'), - pd.Timestamp('2019-01-01 00:00:01'), - pd.Timestamp('2019-01-01 00:00:02'), - pd.Timestamp('2019-01-01 00:00:03'), - pd.Timestamp('2019-01-01 00:00:04'), - pd.Timestamp('2019-01-01 00:00:05'), - None, - float('nan'), - pd.NA], - dtype='datetime64[ns]'), - 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']}) - buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1 a=1546300800000000t,b="a"\n' + - b'tbl1 a=1546300801000000t,b="b"\n' + - b'tbl1 a=1546300802000000t,b="c"\n' + - b'tbl1 a=1546300803000000t,b="d"\n' + - b'tbl1 a=1546300804000000t,b="e"\n' + - b'tbl1 a=1546300805000000t,b="f"\n' + - b'tbl1 b="g"\n' + - b'tbl1 b="h"\n' + - b'tbl1 b="i"\n') - - df = pd.DataFrame({'a': pd.Series([ - pd.Timestamp('1970-01-01 00:00:00'), - pd.Timestamp('1970-01-01 00:00:01'), - pd.Timestamp('1970-01-01 00:00:02')])}) - buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1 a=0t\n' + - b'tbl1 a=1000000t\n' + - b'tbl1 a=2000000t\n') - - def test_datetime64_tz_arrow_col(self): - df = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=0, tz=_TZ), - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=1, tz=_TZ), - None, - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=3, tz=_TZ)], - 'b': ['sym1', 'sym2', 'sym3', 'sym4']}) - buf = _dataframe(df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) - self.assertEqual( - buf, - # Note how these are 5hr offset from `test_datetime64_numpy_col`. - b'tbl1,b=sym1 a=1546318800000000t\n' + - b'tbl1,b=sym2 a=1546318801000000t\n' + - b'tbl1,b=sym3\n' + - b'tbl1,b=sym4 a=1546318803000000t\n') - - # Not epoch 0. - df = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=1970, month=1, day=1, - hour=0, minute=0, second=0, tz=_TZ), - pd.Timestamp( - year=1970, month=1, day=1, - hour=0, minute=0, second=1, tz=_TZ), - pd.Timestamp( - year=1970, month=1, day=1, - hour=0, minute=0, second=2, tz=_TZ)], - 'b': ['sym1', 'sym2', 'sym3']}) - buf = _dataframe(df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) - self.assertEqual( - buf, - # Note how these are 5hr offset from `test_datetime64_numpy_col`. - b'tbl1,b=sym1 a=18000000000t\n' + - b'tbl1,b=sym2 a=18001000000t\n' + - b'tbl1,b=sym3 a=18002000000t\n') - - # Actual epoch 0. - df = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=1969, month=12, day=31, - hour=19, minute=0, second=0, tz=_TZ), - pd.Timestamp( - year=1969, month=12, day=31, - hour=19, minute=0, second=1, tz=_TZ), - pd.Timestamp( - year=1969, month=12, day=31, - hour=19, minute=0, second=2, tz=_TZ)], - 'b': ['sym1', 'sym2', 'sym3']}) - buf = _dataframe(df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1,b=sym1 a=0t\n' + - b'tbl1,b=sym2 a=1000000t\n' + - b'tbl1,b=sym3 a=2000000t\n') - - df2 = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=1900, month=1, day=1, - hour=0, minute=0, second=0, tz=_TZ)], - 'b': ['sym1']}) - buf = _dataframe(df2, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) - - # Accounting for different datatime library differences. - # Mostly, here assert that negative timestamps are allowed. - self.assertIn( - buf, - [b'tbl1,b=sym1 a=-2208970800000000t\n', - b'tbl1,b=sym1 a=-2208971040000000t\n']) - - def test_datetime64_numpy_at(self): - df = pd.DataFrame({ - 'a': pd.Series([ - pd.Timestamp('2019-01-01 00:00:00'), - pd.Timestamp('2019-01-01 00:00:01'), - pd.Timestamp('2019-01-01 00:00:02'), - pd.Timestamp('2019-01-01 00:00:03'), - pd.Timestamp('2019-01-01 00:00:04'), - pd.Timestamp('2019-01-01 00:00:05'), - float('nan'), - None, - pd.NaT], - dtype='datetime64[ns]'), - 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) - buf = _dataframe(df, table_name='tbl1', at='a') - self.assertEqual( - buf, - b'tbl1 b=1i 1546300800000000000\n' + - b'tbl1 b=2i 1546300801000000000\n' + - b'tbl1 b=3i 1546300802000000000\n' + - b'tbl1 b=4i 1546300803000000000\n' + - b'tbl1 b=5i 1546300804000000000\n' + - b'tbl1 b=6i 1546300805000000000\n' + - b'tbl1 b=7i\n' + - b'tbl1 b=8i\n' + - b'tbl1 b=9i\n') - - df = pd.DataFrame({ - 'a': pd.Series([ + 1.7976931348623157e308], # f64 max + dtype='float64')}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('NAN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + + def test_u8_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + 0, + None, + 255], # u8 max + dtype=pd.UInt8Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=255i,b="f"\n') + + def test_i8_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + -128, # i8 min + 0, + None, + 127], # i8 max + dtype=pd.Int8Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-128i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=127i,b="g"\n') + + def test_u16_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + 0, + None, + 65535], # u16 max + dtype=pd.UInt16Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + ('tbl1 a=1i,b="a"\n' + + 'tbl1 a=2i,b="b"\n' + + 'tbl1 a=3i,b="c"\n' + + 'tbl1 a=0i,b="d"\n' + + 'tbl1 b="e"\n' + + 'tbl1 a=65535i,b="f"\n').encode('utf-8')) + + def test_i16_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + -32768, # i16 min + 0, + None, + 32767], # i16 max + dtype=pd.Int16Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-32768i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=32767i,b="g"\n') + + def test_u32_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + 0, + None, + 4294967295], # u32 max + dtype=pd.UInt32Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=4294967295i,b="f"\n') + + def test_i32_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + -2147483648, # i32 min + 0, + None, + 2147483647], # i32 max + dtype=pd.Int32Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-2147483648i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=2147483647i,b="g"\n') + + def test_u64_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + 0, + None, + 9223372036854775807], # i64 max + dtype=pd.UInt64Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=0i,b="d"\n' + + b'tbl1 b="e"\n' + + b'tbl1 a=9223372036854775807i,b="f"\n') + + df2 = pd.DataFrame({'a': pd.Series([ + 1, 2, 3, + 0, + 9223372036854775808], # i64 max + 1 + dtype=pd.UInt64Dtype())}) + with self.assertRaisesRegex( + qi.IngressError, + '.* serialize .* column .a. .* 4 .*9223372036854775808.*int64.*'): + _dataframe(self.version, df2, table_name='tbl1', at=qi.ServerTimestamp) + + def test_i64_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, + -9223372036854775808, # i64 min + 0, + None, + 9223372036854775807], # i64 max + dtype=pd.Int64Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1i,b="a"\n' + + b'tbl1 a=2i,b="b"\n' + + b'tbl1 a=3i,b="c"\n' + + b'tbl1 a=-9223372036854775808i,b="d"\n' + + b'tbl1 a=0i,b="e"\n' + + b'tbl1 b="f"\n' + + b'tbl1 a=9223372036854775807i,b="g"\n') + + def test_f32_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1.0, 2.0, 3.0, + 0.0, + float('inf'), + float('-inf'), + float('nan'), + 3.4028234663852886e38, # f32 max + None], + dtype=pd.Float32Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="a"\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="b"\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="c"\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="d"\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="e"\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a' + _float_binary_bytes(3.4028234663852886e38, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="h"\n' + + b'tbl1 b="i"\n') + + def test_f64_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 1.0, 2.0, 3.0, + 0.0, + float('inf'), + float('-inf'), + float('nan'), + 1.7976931348623157e308, # f64 max + None], + dtype=pd.Float64Dtype()), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="a"\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="b"\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="c"\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="d"\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="e"\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="f"\n' + + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. + b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="h"\n' + + b'tbl1 b="i"\n') + + def test_bool_numpy_col(self): + df = pd.DataFrame({'a': pd.Series([ + True, False, False, + False, True, False], + dtype='bool')}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') + + def test_bool_arrow_col(self): + df = pd.DataFrame({'a': pd.Series([ + True, False, False, + False, True, False, + True, True, True, + False, False, False], + dtype='boolean')}) # Note `boolean` != `bool`. + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n') + + df2 = pd.DataFrame({'a': pd.Series([ + True, False, False, + None, True, False], + dtype='boolean')}) + with self.assertRaisesRegex( + qi.IngressError, + 'Failed.*at row index 3 .*.: .*insert null .*boolean col'): + _dataframe(self.version, df2, table_name='tbl1', at=qi.ServerTimestamp) + + def test_bool_obj_col(self): + df = pd.DataFrame({'a': pd.Series([ + True, False, False, + False, True, False], + dtype='object')}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=t\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=f\n' + + b'tbl1 a=t\n' + + b'tbl1 a=f\n') + + df2 = pd.DataFrame({'a': pd.Series([ + True, False, 'false'], + dtype='object')}) + with self.assertRaisesRegex( + qi.IngressError, + 'serialize .* column .a. .* 2 .*false.*bool'): + _dataframe(self.version, df2, table_name='tbl1', at=qi.ServerTimestamp) + + df3 = pd.DataFrame({'a': pd.Series([ + None, True, False], + dtype='object')}) + with self.assertRaisesRegex( + qi.IngressError, + 'serialize.*\\(None\\): Cannot insert null.*boolean column'): + _dataframe(self.version, df3, table_name='tbl1', at=qi.ServerTimestamp) + + def test_datetime64_numpy_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + pd.Timestamp('2019-01-01 00:00:00'), + pd.Timestamp('2019-01-01 00:00:01'), + pd.Timestamp('2019-01-01 00:00:02'), + pd.Timestamp('2019-01-01 00:00:03'), + pd.Timestamp('2019-01-01 00:00:04'), + pd.Timestamp('2019-01-01 00:00:05'), + None, + float('nan'), + pd.NA], + dtype='datetime64[ns]'), + 'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=1546300800000000t,b="a"\n' + + b'tbl1 a=1546300801000000t,b="b"\n' + + b'tbl1 a=1546300802000000t,b="c"\n' + + b'tbl1 a=1546300803000000t,b="d"\n' + + b'tbl1 a=1546300804000000t,b="e"\n' + + b'tbl1 a=1546300805000000t,b="f"\n' + + b'tbl1 b="g"\n' + + b'tbl1 b="h"\n' + + b'tbl1 b="i"\n') + + df = pd.DataFrame({'a': pd.Series([ pd.Timestamp('1970-01-01 00:00:00'), pd.Timestamp('1970-01-01 00:00:01'), - pd.Timestamp('1970-01-01 00:00:02')], - dtype='datetime64[ns]'), - 'b': [1, 2, 3]}) - buf = _dataframe(df, table_name='tbl1', at='a') - self.assertEqual( - buf, - b'tbl1 b=1i 0\n' + - b'tbl1 b=2i 1000000000\n' + - b'tbl1 b=3i 2000000000\n') - - def test_datetime64_tz_arrow_at(self): - df = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=0, tz=_TZ), - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=1, tz=_TZ), - None, - pd.Timestamp( - year=2019, month=1, day=1, - hour=0, minute=0, second=3, tz=_TZ)], - 'b': ['sym1', 'sym2', 'sym3', 'sym4']}) - buf = _dataframe(df, table_name='tbl1', symbols=['b'], at='a') - self.assertEqual( - buf, - # Note how these are 5hr offset from `test_datetime64_numpy_col`. - b'tbl1,b=sym1 1546318800000000000\n' + - b'tbl1,b=sym2 1546318801000000000\n' + - b'tbl1,b=sym3\n' + - b'tbl1,b=sym4 1546318803000000000\n') - - df2 = pd.DataFrame({ - 'a': [ - pd.Timestamp( - year=1900, month=1, day=1, - hour=0, minute=0, second=0, tz=_TZ)], - 'b': ['sym1']}) - with self.assertRaisesRegex( - qi.IngressError, "Failed.*'a'.*-220897.* is neg"): - _dataframe(df2, table_name='tbl1', symbols=['b'], at='a') - - def _test_pyobjstr_table(self, dtype): - df = pd.DataFrame({ - '../bad col name/../it does not matter...': - pd.Series([ + pd.Timestamp('1970-01-01 00:00:02')])}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=0t\n' + + b'tbl1 a=1000000t\n' + + b'tbl1 a=2000000t\n') + + def test_datetime64_tz_arrow_col(self): + df = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=0, tz=_TZ), + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=1, tz=_TZ), + None, + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=3, tz=_TZ)], + 'b': ['sym1', 'sym2', 'sym3', 'sym4']}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) + self.assertEqual( + buf, + # Note how these are 5hr offset from `test_datetime64_numpy_col`. + b'tbl1,b=sym1 a=1546318800000000t\n' + + b'tbl1,b=sym2 a=1546318801000000t\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 a=1546318803000000t\n') + + # Not epoch 0. + df = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=1970, month=1, day=1, + hour=0, minute=0, second=0, tz=_TZ), + pd.Timestamp( + year=1970, month=1, day=1, + hour=0, minute=0, second=1, tz=_TZ), + pd.Timestamp( + year=1970, month=1, day=1, + hour=0, minute=0, second=2, tz=_TZ)], + 'b': ['sym1', 'sym2', 'sym3']}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) + self.assertEqual( + buf, + # Note how these are 5hr offset from `test_datetime64_numpy_col`. + b'tbl1,b=sym1 a=18000000000t\n' + + b'tbl1,b=sym2 a=18001000000t\n' + + b'tbl1,b=sym3 a=18002000000t\n') + + # Actual epoch 0. + df = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=1969, month=12, day=31, + hour=19, minute=0, second=0, tz=_TZ), + pd.Timestamp( + year=1969, month=12, day=31, + hour=19, minute=0, second=1, tz=_TZ), + pd.Timestamp( + year=1969, month=12, day=31, + hour=19, minute=0, second=2, tz=_TZ)], + 'b': ['sym1', 'sym2', 'sym3']}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1,b=sym1 a=0t\n' + + b'tbl1,b=sym2 a=1000000t\n' + + b'tbl1,b=sym3 a=2000000t\n') + + df2 = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=1900, month=1, day=1, + hour=0, minute=0, second=0, tz=_TZ)], + 'b': ['sym1']}) + buf = _dataframe(self.version, df2, table_name='tbl1', symbols=['b'], at=qi.ServerTimestamp) + + # Accounting for different datatime library differences. + # Mostly, here assert that negative timestamps are allowed. + self.assertIn( + buf, + [b'tbl1,b=sym1 a=-2208970800000000t\n', + b'tbl1,b=sym1 a=-2208971040000000t\n']) + + def test_datetime64_numpy_at(self): + df = pd.DataFrame({ + 'a': pd.Series([ + pd.Timestamp('2019-01-01 00:00:00'), + pd.Timestamp('2019-01-01 00:00:01'), + pd.Timestamp('2019-01-01 00:00:02'), + pd.Timestamp('2019-01-01 00:00:03'), + pd.Timestamp('2019-01-01 00:00:04'), + pd.Timestamp('2019-01-01 00:00:05'), + float('nan'), + None, + pd.NaT], + dtype='datetime64[ns]'), + 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) + buf = _dataframe(self.version, df, table_name='tbl1', at='a') + self.assertEqual( + buf, + b'tbl1 b=1i 1546300800000000000\n' + + b'tbl1 b=2i 1546300801000000000\n' + + b'tbl1 b=3i 1546300802000000000\n' + + b'tbl1 b=4i 1546300803000000000\n' + + b'tbl1 b=5i 1546300804000000000\n' + + b'tbl1 b=6i 1546300805000000000\n' + + b'tbl1 b=7i\n' + + b'tbl1 b=8i\n' + + b'tbl1 b=9i\n') + + df = pd.DataFrame({ + 'a': pd.Series([ + pd.Timestamp('1970-01-01 00:00:00'), + pd.Timestamp('1970-01-01 00:00:01'), + pd.Timestamp('1970-01-01 00:00:02')], + dtype='datetime64[ns]'), + 'b': [1, 2, 3]}) + buf = _dataframe(self.version, df, table_name='tbl1', at='a') + self.assertEqual( + buf, + b'tbl1 b=1i 0\n' + + b'tbl1 b=2i 1000000000\n' + + b'tbl1 b=3i 2000000000\n') + + def test_datetime64_tz_arrow_at(self): + df = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=0, tz=_TZ), + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=1, tz=_TZ), + None, + pd.Timestamp( + year=2019, month=1, day=1, + hour=0, minute=0, second=3, tz=_TZ)], + 'b': ['sym1', 'sym2', 'sym3', 'sym4']}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=['b'], at='a') + self.assertEqual( + buf, + # Note how these are 5hr offset from `test_datetime64_numpy_col`. + b'tbl1,b=sym1 1546318800000000000\n' + + b'tbl1,b=sym2 1546318801000000000\n' + + b'tbl1,b=sym3\n' + + b'tbl1,b=sym4 1546318803000000000\n') + + df2 = pd.DataFrame({ + 'a': [ + pd.Timestamp( + year=1900, month=1, day=1, + hour=0, minute=0, second=0, tz=_TZ)], + 'b': ['sym1']}) + with self.assertRaisesRegex( + qi.IngressError, "Failed.*'a'.*-220897.* is neg"): + _dataframe(self.version, df2, table_name='tbl1', symbols=['b'], at='a') + + def _test_pyobjstr_table(self, dtype): + df = pd.DataFrame({ + '../bad col name/../it does not matter...': + pd.Series([ + 'a', # ASCII + 'b' * 127, # Max table name length. + 'q❤️p', # Mixed ASCII and UCS-2 + '嚜꓂', # UCS-2, 3 bytes for UTF-8. + '💩🦞'], # UCS-4, 4 bytes for UTF-8. + dtype=dtype), + 'b': [1, 2, 3, 4, 5]}) + buf = _dataframe(self.version, df, table_name_col=0, at=qi.ServerTimestamp) + self.assertEqual( + buf, + ('a b=1i\n' + + ('b' * 127) + ' b=2i\n' + + 'q❤️p b=3i\n' + + '嚜꓂ b=4i\n' + + '💩🦞 b=5i\n').encode("utf-8")) + + with self.assertRaisesRegex( + qi.IngressError, "Too long"): + _dataframe(self.version, + pd.DataFrame({'a': pd.Series(['b' * 128], dtype=dtype)}), + table_name_col='a', at=qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, 'Failed.*Expected a table name, got a null.*'): + _dataframe(self.version, + pd.DataFrame({ + '.': pd.Series(['x', None], dtype=dtype), + 'b': [1, 2]}), + table_name_col='.', at=qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, 'Failed.*Expected a table name, got a null.*'): + _dataframe(self.version, + pd.DataFrame({ + '.': pd.Series(['x', float('nan')], dtype=dtype), + 'b': [1, 2]}), + table_name_col='.', at=qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, 'Failed.*Expected a table name, got a null.*'): + _dataframe(self.version, + pd.DataFrame({ + '.': pd.Series(['x', pd.NA], dtype=dtype), + 'b': [1, 2]}), + table_name_col='.', at=qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, "''.*must have a non-zero length"): + _dataframe(self.version, + pd.DataFrame({ + '/': pd.Series([''], dtype=dtype), + 'b': [1]}), + table_name_col='/', at=qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"): + _dataframe(self.version, + pd.DataFrame({ + '/': pd.Series(['tab..1'], dtype=dtype), + 'b': [1]}), + table_name_col='/', at=qi.ServerTimestamp) + + def test_obj_str_table(self): + self._test_pyobjstr_table('object') + + with self.assertRaisesRegex( + qi.IngressError, 'table name .*got an object of type int'): + _dataframe(self.version, + pd.DataFrame({ + '.': pd.Series(['x', 42], dtype='object'), + 'z': [1, 2]}), + table_name_col='.', at=qi.ServerTimestamp) + + def test_obj_string_table(self): + self._test_pyobjstr_table('string') + + self.assertEqual( + _dataframe(self.version, + pd.DataFrame({ + '.': pd.Series(['x', 42], dtype='string'), + 'z': [1, 2]}), + table_name_col='.', at=qi.ServerTimestamp), + b'x z=1i\n' + + b'42 z=2i\n') + + def _test_pyobjstr_numpy_symbol(self, dtype): + df = pd.DataFrame({'a': pd.Series([ 'a', # ASCII - 'b' * 127, # Max table name length. 'q❤️p', # Mixed ASCII and UCS-2 + '❤️' * 1200, # Over the 1024 buffer prealloc. + 'Questo è un qualcosa', # Non-ASCII UCS-1 + 'щось', # UCS-2, 2 bytes for UTF-8. + '', # Empty string '嚜꓂', # UCS-2, 3 bytes for UTF-8. '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype=dtype), - 'b': [1, 2, 3, 4, 5]}) - buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) - self.assertEqual( - buf, - ('a b=1i\n' + - ('b' * 127) + ' b=2i\n' + - 'q❤️p b=3i\n' + - '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n').encode("utf-8")) - - with self.assertRaisesRegex( - qi.IngressError, "Too long"): - _dataframe( - pd.DataFrame({'a': pd.Series(['b' * 128], dtype=dtype)}), - table_name_col='a', at=qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, 'Failed.*Expected a table name, got a null.*'): - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', None], dtype=dtype), - 'b': [1, 2]}), - table_name_col='.', at=qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, 'Failed.*Expected a table name, got a null.*'): - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', float('nan')], dtype=dtype), - 'b': [1, 2]}), - table_name_col='.', at=qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, 'Failed.*Expected a table name, got a null.*'): - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', pd.NA], dtype=dtype), - 'b': [1, 2]}), - table_name_col='.', at=qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, "''.*must have a non-zero length"): - _dataframe( - pd.DataFrame({ - '/': pd.Series([''], dtype=dtype), - 'b': [1]}), - table_name_col='/', at=qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"): - _dataframe( - pd.DataFrame({ - '/': pd.Series(['tab..1'], dtype=dtype), - 'b': [1]}), - table_name_col='/', at=qi.ServerTimestamp) - - def test_obj_str_table(self): - self._test_pyobjstr_table('object') - - with self.assertRaisesRegex( - qi.IngressError, 'table name .*got an object of type int'): - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', 42], dtype='object'), - 'z': [1, 2]}), - table_name_col='.', at=qi.ServerTimestamp) - - def test_obj_string_table(self): - self._test_pyobjstr_table('string') - - self.assertEqual( - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', 42], dtype='string'), - 'z': [1, 2]}), - table_name_col='.', at=qi.ServerTimestamp), - b'x z=1i\n' + - b'42 z=2i\n') - - def _test_pyobjstr_numpy_symbol(self, dtype): - df = pd.DataFrame({'a': pd.Series([ - 'a', # ASCII - 'q❤️p', # Mixed ASCII and UCS-2 - '❤️' * 1200, # Over the 1024 buffer prealloc. - 'Questo è un qualcosa', # Non-ASCII UCS-1 - 'щось', # UCS-2, 2 bytes for UTF-8. - '', # Empty string - '嚜꓂', # UCS-2, 3 bytes for UTF-8. - '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype=dtype)}) - buf = _dataframe(df, table_name='tbl1', symbols=True, at=qi.ServerTimestamp) - self.assertEqual( - buf, - ('tbl1,a=a\n' + - 'tbl1,a=q❤️p\n' + - 'tbl1,a=' + ('❤️' * 1200) + '\n' + - 'tbl1,a=Questo\\ è\\ un\\ qualcosa\n' + - 'tbl1,a=щось\n' + - 'tbl1,a=\n' + - 'tbl1,a=嚜꓂\n' + - 'tbl1,a=💩🦞\n').encode("utf-8")) - - for null_obj in (None, float('nan'), pd.NA): + dtype=dtype)}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=True, at=qi.ServerTimestamp) self.assertEqual( + buf, + ('tbl1,a=a\n' + + 'tbl1,a=q❤️p\n' + + 'tbl1,a=' + ('❤️' * 1200) + '\n' + + 'tbl1,a=Questo\\ è\\ un\\ qualcosa\n' + + 'tbl1,a=щось\n' + + 'tbl1,a=\n' + + 'tbl1,a=嚜꓂\n' + + 'tbl1,a=💩🦞\n').encode("utf-8")) + + for null_obj in (None, float('nan'), pd.NA): + self.assertEqual( + _dataframe( + self.version, + pd.DataFrame({ + 'x': pd.Series(['a', null_obj], dtype=dtype), + 'y': [1, 2]}), + table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), + b'tbl1,x=a y=1i\n' + + b'tbl1 y=2i\n') + + def test_obj_str_numpy_symbol(self): + self._test_pyobjstr_numpy_symbol('object') + + with self.assertRaisesRegex( + qi.IngressError, 'Expected a string, got an .* type int'): _dataframe( + self.version, pd.DataFrame({ - 'x': pd.Series(['a', null_obj], dtype=dtype), + 'x': pd.Series(['x', 42], dtype='object'), + 'y': [1, 2]}), + table_name='tbl1', symbols=[0], at=qi.ServerTimestamp) + + def test_obj_string_numpy_symbol(self): + self._test_pyobjstr_numpy_symbol('string') + + self.assertEqual( + _dataframe( + self.version, + pd.DataFrame({ + 'x': pd.Series(['x', 42], dtype='string'), 'y': [1, 2]}), table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - b'tbl1,x=a y=1i\n' + - b'tbl1 y=2i\n') - - def test_obj_str_numpy_symbol(self): - self._test_pyobjstr_numpy_symbol('object') - - with self.assertRaisesRegex( - qi.IngressError, 'Expected a string, got an .* type int'): - _dataframe( - pd.DataFrame({ - 'x': pd.Series(['x', 42], dtype='object'), - 'y': [1, 2]}), - table_name='tbl1', symbols=[0], at=qi.ServerTimestamp) - - def test_obj_string_numpy_symbol(self): - self._test_pyobjstr_numpy_symbol('string') - - self.assertEqual( - _dataframe( - pd.DataFrame({ - 'x': pd.Series(['x', 42], dtype='string'), - 'y': [1, 2]}), - table_name='tbl1', symbols=[0], at=qi.ServerTimestamp), - b'tbl1,x=x y=1i\n' + - b'tbl1,x=42 y=2i\n') - - def test_str_numpy_col(self): - df = pd.DataFrame({'a': pd.Series([ - 'a', # ASCII - 'q❤️p', # Mixed ASCII and UCS-2 - '❤️' * 1200, # Over the 1024 buffer prealloc. - 'Questo è un qualcosa', # Non-ASCII UCS-1 - 'щось', # UCS-2, 2 bytes for UTF-8. - '', # Empty string - '嚜꓂', # UCS-2, 3 bytes for UTF-8. - '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype='str')}) - buf = _dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) - self.assertEqual( - buf, - ('tbl1 a="a"\n' + - 'tbl1 a="q❤️p"\n' + - 'tbl1 a="' + ('❤️' * 1200) + '"\n' + - 'tbl1 a="Questo è un qualcosa"\n' + - 'tbl1 a="щось"\n' + - 'tbl1 a=""\n' + - 'tbl1 a="嚜꓂"\n' + - 'tbl1 a="💩🦞"\n').encode("utf-8")) - - def test_str_arrow_table(self): - df = pd.DataFrame({ - '../bad col name/../it does not matter...': pd.Series([ - 'a', # ASCII - 'b' * 127, # Max table name length. - 'q❤️p', # Mixed ASCII and UCS-2 - '嚜꓂', # UCS-2, 3 bytes for UTF-8. - '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype='string[pyarrow]'), - 'b': [1, 2, 3, 4, 5]}) - buf = _dataframe(df, table_name_col=0, at=qi.ServerTimestamp) - self.assertEqual( - buf, - ('a b=1i\n' + - ('b' * 127) + ' b=2i\n' + - 'q❤️p b=3i\n' + - '嚜꓂ b=4i\n' + - '💩🦞 b=5i\n').encode("utf-8")) - - with self.assertRaisesRegex( - qi.IngressError, "Too long"): - _dataframe( - pd.DataFrame({ - 'a': pd.Series(['b' * 128], dtype='string[pyarrow]')}), - table_name_col='a', at = qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, "Failed .*.*Table name cannot be null"): - _dataframe( - pd.DataFrame({ - '.': pd.Series(['x', None], dtype='string[pyarrow]'), - 'b': [1, 2]}), - table_name_col='.', at = qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, "''.*must have a non-zero length"): - _dataframe( - pd.DataFrame({ - '/': pd.Series([''], dtype='string[pyarrow]')}), - table_name_col='/', at = qi.ServerTimestamp) - - with self.assertRaisesRegex( - qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"): - _dataframe( - pd.DataFrame({ - '/': pd.Series(['tab..1'], dtype='string[pyarrow]')}), - table_name_col='/', at = qi.ServerTimestamp) - - def test_str_arrow_symbol(self): - df = pd.DataFrame({ - 'a': pd.Series([ - 'a', # ASCII - 'q❤️p', # Mixed ASCII and UCS-2 - '❤️' * 1200, # Over the 1024 buffer prealloc. - 'Questo è un qualcosa', # Non-ASCII UCS-1 - 'щось', # UCS-2, 2 bytes for UTF-8. - '', # Empty string - None, - '嚜꓂', # UCS-2, 3 bytes for UTF-8. - '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype='string[pyarrow]'), - 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) - buf = _dataframe(df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) - self.assertEqual( - buf, - ('tbl1,a=a b=1i\n' + - 'tbl1,a=q❤️p b=2i\n' + - 'tbl1,a=' + ('❤️' * 1200) + ' b=3i\n' + - 'tbl1,a=Questo\\ è\\ un\\ qualcosa b=4i\n' + - 'tbl1,a=щось b=5i\n' + - 'tbl1,a= b=6i\n' + - 'tbl1 b=7i\n' + - 'tbl1,a=嚜꓂ b=8i\n' + - 'tbl1,a=💩🦞 b=9i\n').encode('utf-8')) - - def test_str_arrow_col(self): - df = pd.DataFrame({ - 'a': pd.Series([ - 'a', # ASCII - 'q❤️p', # Mixed ASCII and UCS-2 - '❤️' * 1200, # Over the 1024 buffer prealloc. - 'Questo è un qualcosa', # Non-ASCII UCS-1 - 'щось', # UCS-2, 2 bytes for UTF-8. - '', # Empty string - None, - '嚜꓂', # UCS-2, 3 bytes for UTF-8. - '💩🦞'], # UCS-4, 4 bytes for UTF-8. - dtype='string[pyarrow]'), - 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) - buf = _dataframe(df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) - self.assertEqual( - buf, - ('tbl1 a="a",b=1i\n' + - 'tbl1 a="q❤️p",b=2i\n' + - 'tbl1 a="' + ('❤️' * 1200) + '",b=3i\n' + - 'tbl1 a="Questo è un qualcosa",b=4i\n' + - 'tbl1 a="щось",b=5i\n' + - 'tbl1 a="",b=6i\n' + - 'tbl1 b=7i\n' + - 'tbl1 a="嚜꓂",b=8i\n' + - 'tbl1 a="💩🦞",b=9i\n').encode('utf-8')) - - def test_pyobj_int_col(self): - int64_min = -2**63 - int64_max = 2**63 - 1 - self.assertEqual( - _dataframe( - pd.DataFrame({ - 'a': pd.Series([ - 1, 2, 3, None, float('nan'), pd.NA, 7, - 0, - int64_min, - int64_max], dtype='object'), - 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), - table_name='tbl1', at = qi.ServerTimestamp), - ('tbl1 a=1i,b=1i\n' + - 'tbl1 a=2i,b=2i\n' + - 'tbl1 a=3i,b=3i\n' + - 'tbl1 b=4i\n' + - 'tbl1 b=5i\n' + - 'tbl1 b=6i\n' + - 'tbl1 a=7i,b=7i\n' + - 'tbl1 a=0i,b=8i\n' + - 'tbl1 a=' + str(int64_min) + 'i,b=9i\n' + - 'tbl1 a=' + str(int64_max) + 'i,b=10i\n').encode('utf-8')) - - with self.assertRaisesRegex( - qi.IngressError, "1 \\('STRING'\\): .*type int, got.*str\\."): - _dataframe( - pd.DataFrame({ - 'a': pd.Series([1, 'STRING'], dtype='object'), - 'b': [1, 2]}), - table_name='tbl1', at = qi.ServerTimestamp) - - out_of_range = [int64_min - 1, int64_max + 1] - for num in out_of_range: + b'tbl1,x=x y=1i\n' + + b'tbl1,x=42 y=2i\n') + + def test_str_numpy_col(self): + df = pd.DataFrame({'a': pd.Series([ + 'a', # ASCII + 'q❤️p', # Mixed ASCII and UCS-2 + '❤️' * 1200, # Over the 1024 buffer prealloc. + 'Questo è un qualcosa', # Non-ASCII UCS-1 + 'щось', # UCS-2, 2 bytes for UTF-8. + '', # Empty string + '嚜꓂', # UCS-2, 3 bytes for UTF-8. + '💩🦞'], # UCS-4, 4 bytes for UTF-8. + dtype='str')}) + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + ('tbl1 a="a"\n' + + 'tbl1 a="q❤️p"\n' + + 'tbl1 a="' + ('❤️' * 1200) + '"\n' + + 'tbl1 a="Questo è un qualcosa"\n' + + 'tbl1 a="щось"\n' + + 'tbl1 a=""\n' + + 'tbl1 a="嚜꓂"\n' + + 'tbl1 a="💩🦞"\n').encode("utf-8")) + + def test_str_arrow_table(self): + df = pd.DataFrame({ + '../bad col name/../it does not matter...': pd.Series([ + 'a', # ASCII + 'b' * 127, # Max table name length. + 'q❤️p', # Mixed ASCII and UCS-2 + '嚜꓂', # UCS-2, 3 bytes for UTF-8. + '💩🦞'], # UCS-4, 4 bytes for UTF-8. + dtype='string[pyarrow]'), + 'b': [1, 2, 3, 4, 5]}) + buf = _dataframe(self.version, df, table_name_col=0, at=qi.ServerTimestamp) + self.assertEqual( + buf, + ('a b=1i\n' + + ('b' * 127) + ' b=2i\n' + + 'q❤️p b=3i\n' + + '嚜꓂ b=4i\n' + + '💩🦞 b=5i\n').encode("utf-8")) + + with self.assertRaisesRegex( + qi.IngressError, "Too long"): + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series(['b' * 128], dtype='string[pyarrow]')}), + table_name_col='a', at = qi.ServerTimestamp) + with self.assertRaisesRegex( - qi.IngressError, "index 1 .*922337203685477.*int too big"): + qi.IngressError, "Failed .*.*Table name cannot be null"): _dataframe( + self.version, pd.DataFrame({ - 'a': pd.Series([1, num], dtype='object'), + '.': pd.Series(['x', None], dtype='string[pyarrow]'), + 'b': [1, 2]}), + table_name_col='.', at = qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, "''.*must have a non-zero length"): + _dataframe( + self.version, + pd.DataFrame({ + '/': pd.Series([''], dtype='string[pyarrow]')}), + table_name_col='/', at = qi.ServerTimestamp) + + with self.assertRaisesRegex( + qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"): + _dataframe( + self.version, + pd.DataFrame({ + '/': pd.Series(['tab..1'], dtype='string[pyarrow]')}), + table_name_col='/', at = qi.ServerTimestamp) + + def test_str_arrow_symbol(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 'a', # ASCII + 'q❤️p', # Mixed ASCII and UCS-2 + '❤️' * 1200, # Over the 1024 buffer prealloc. + 'Questo è un qualcosa', # Non-ASCII UCS-1 + 'щось', # UCS-2, 2 bytes for UTF-8. + '', # Empty string + None, + '嚜꓂', # UCS-2, 3 bytes for UTF-8. + '💩🦞'], # UCS-4, 4 bytes for UTF-8. + dtype='string[pyarrow]'), + 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) + self.assertEqual( + buf, + ('tbl1,a=a b=1i\n' + + 'tbl1,a=q❤️p b=2i\n' + + 'tbl1,a=' + ('❤️' * 1200) + ' b=3i\n' + + 'tbl1,a=Questo\\ è\\ un\\ qualcosa b=4i\n' + + 'tbl1,a=щось b=5i\n' + + 'tbl1,a= b=6i\n' + + 'tbl1 b=7i\n' + + 'tbl1,a=嚜꓂ b=8i\n' + + 'tbl1,a=💩🦞 b=9i\n').encode('utf-8')) + + def test_str_arrow_col(self): + df = pd.DataFrame({ + 'a': pd.Series([ + 'a', # ASCII + 'q❤️p', # Mixed ASCII and UCS-2 + '❤️' * 1200, # Over the 1024 buffer prealloc. + 'Questo è un qualcosa', # Non-ASCII UCS-1 + 'щось', # UCS-2, 2 bytes for UTF-8. + '', # Empty string + None, + '嚜꓂', # UCS-2, 3 bytes for UTF-8. + '💩🦞'], # UCS-4, 4 bytes for UTF-8. + dtype='string[pyarrow]'), + 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) + self.assertEqual( + buf, + ('tbl1 a="a",b=1i\n' + + 'tbl1 a="q❤️p",b=2i\n' + + 'tbl1 a="' + ('❤️' * 1200) + '",b=3i\n' + + 'tbl1 a="Questo è un qualcosa",b=4i\n' + + 'tbl1 a="щось",b=5i\n' + + 'tbl1 a="",b=6i\n' + + 'tbl1 b=7i\n' + + 'tbl1 a="嚜꓂",b=8i\n' + + 'tbl1 a="💩🦞",b=9i\n').encode('utf-8')) + + def test_pyobj_int_col(self): + int64_min = -2**63 + int64_max = 2**63 - 1 + self.assertEqual( + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series([ + 1, 2, 3, None, float('nan'), pd.NA, 7, + 0, + int64_min, + int64_max], dtype='object'), + 'b': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), + table_name='tbl1', at = qi.ServerTimestamp), + ('tbl1 a=1i,b=1i\n' + + 'tbl1 a=2i,b=2i\n' + + 'tbl1 a=3i,b=3i\n' + + 'tbl1 b=4i\n' + + 'tbl1 b=5i\n' + + 'tbl1 b=6i\n' + + 'tbl1 a=7i,b=7i\n' + + 'tbl1 a=0i,b=8i\n' + + 'tbl1 a=' + str(int64_min) + 'i,b=9i\n' + + 'tbl1 a=' + str(int64_max) + 'i,b=10i\n').encode('utf-8')) + + with self.assertRaisesRegex( + qi.IngressError, "1 \\('STRING'\\): .*type int, got.*str\\."): + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series([1, 'STRING'], dtype='object'), + 'b': [1, 2]}), + table_name='tbl1', at = qi.ServerTimestamp) + + out_of_range = [int64_min - 1, int64_max + 1] + for num in out_of_range: + with self.assertRaisesRegex( + qi.IngressError, "index 1 .*922337203685477.*int too big"): + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series([1, num], dtype='object'), + 'b': [1, 2]}), + table_name='tbl1', at = qi.ServerTimestamp) + + def test_pyobj_float_col(self): + self.assertEqual( + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series( + [1.0, 2.0, 3.0, None, float('nan'), pd.NA, 7.0], + dtype='object'), + 'b': [1, 2, 3, 4, 5, 6, 7]}), + table_name='tbl1', at = qi.ServerTimestamp), + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=1i\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=2i\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=3i\n' + + b'tbl1 b=4i\n' + + b'tbl1 a' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=5i\n' + + b'tbl1 b=6i\n' + + b'tbl1 a' + _float_binary_bytes(7.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=7i\n') + + with self.assertRaisesRegex( + qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."): + _dataframe( + self.version, + pd.DataFrame({ + 'a': pd.Series([1.0, 'STRING'], dtype='object'), 'b': [1, 2]}), table_name='tbl1', at = qi.ServerTimestamp) - def test_pyobj_float_col(self): - self.assertEqual( - _dataframe( - pd.DataFrame({ - 'a': pd.Series( - [1.0, 2.0, 3.0, None, float('nan'), pd.NA, 7.0], - dtype='object'), - 'b': [1, 2, 3, 4, 5, 6, 7]}), - table_name='tbl1', at = qi.ServerTimestamp), - b'tbl1 a=1.0,b=1i\n' + - b'tbl1 a=2.0,b=2i\n' + - b'tbl1 a=3.0,b=3i\n' + - b'tbl1 b=4i\n' + - b'tbl1 a=NaN,b=5i\n' + - b'tbl1 b=6i\n' + - b'tbl1 a=7.0,b=7i\n') - - with self.assertRaisesRegex( - qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."): - _dataframe( - pd.DataFrame({ - 'a': pd.Series([1.0, 'STRING'], dtype='object'), - 'b': [1, 2]}), - table_name='tbl1', at = qi.ServerTimestamp) - - def test_bad_category(self): - # We only support string categories - # (unless anyone asks for additional ones). - # We want to test others are rejected. - with self.assertRaisesRegex( - qi.IngressError, "Bad column 'a'.*got a category of .*int64"): - _dataframe( - pd.DataFrame({'a': pd.Series([1, 2, 3, 2], dtype='category')}), - table_name='tbl1', at = qi.ServerTimestamp) - - def _test_cat_table(self, count): - slist = [f's{i}' for i in range(count)] - - df = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - - buf = _dataframe(df, table_name_col=0, at = qi.ServerTimestamp) - exp = ''.join( - f'{s} b={i}i\n' - for i, s in enumerate(slist)) - self.assertEqual(buf, exp.encode("utf-8")) - - slist[2] = None - df2 = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - with self.assertRaisesRegex( - qi.IngressError, 'Table name cannot be null'): - _dataframe(df2, table_name_col=0, at = qi.ServerTimestamp) - - def test_cat_i8_table(self): - self._test_cat_table(30) - self._test_cat_table(127) - - def test_cat_i16_table(self): - self._test_cat_table(128) - self._test_cat_table(4000) - self._test_cat_table(32767) - - def test_cat_i32_table(self): - self._test_cat_table(32768) - self._test_cat_table(40000) - - def _test_cat_symbol(self, count): - slist = [f's{i}' for i in range(count)] - - df = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - - buf = _dataframe(df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) - exp = ''.join( - f'tbl1,a={s} b={i}i\n' - for i, s in enumerate(slist)) - self.assertEqual(buf, exp.encode("utf-8")) - - slist[2] = None - df2 = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - - exp2 = exp.replace('tbl1,a=s2 b=2i\n', 'tbl1 b=2i\n') - buf2 = _dataframe(df2, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2.encode("utf-8")) - - def test_cat_i8_symbol(self): - self._test_cat_symbol(30) - self._test_cat_symbol(127) - - def test_cat_i16_symbol(self): - self._test_cat_symbol(128) - self._test_cat_symbol(4000) - self._test_cat_symbol(32767) - - def test_cat_i32_symbol(self): - self._test_cat_symbol(32768) - self._test_cat_symbol(40000) - - def _test_cat_str(self, count): - slist = [f's{i}' for i in range(count)] - - df = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - - buf = _dataframe(df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) - exp = ''.join( - f'tbl1 a="{s}",b={i}i\n' - for i, s in enumerate(slist)) - self.assertEqual(buf, exp.encode("utf-8")) - - slist[2] = None - df2 = pd.DataFrame({ - 'a': pd.Series(slist, dtype='category'), - 'b': list(range(len(slist)))}) - - exp2 = exp.replace('tbl1 a="s2",b=2i\n', 'tbl1 b=2i\n') - buf2 = _dataframe(df2, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) - self.assertEqual(buf2, exp2.encode("utf-8")) - - def test_cat_i8_str(self): - self._test_cat_str(30) - self._test_cat_str(127) - - def test_cat_i16_str(self): - self._test_cat_str(128) - self._test_cat_str(4000) - self._test_cat_str(32767) - - def test_cat_i32_str(self): - self._test_cat_str(32768) - self._test_cat_str(40000) - - def test_all_nulls_pyobj_col(self): - df = pd.DataFrame({ - 'a': [None, pd.NA, float('nan')], - 'b': [1, 2, 3]}) - buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1 b=1i\n' + - b'tbl1 b=2i\n' + - b'tbl1 b=3i\n') - - def test_strided_numpy_column(self): - two_d = np.array([ - [1, 10], - [2, 20], - [3, 30]], dtype='int64') - col2 = two_d[:, 1] - col2.flags['WRITEABLE'] = False - - # Checking our test case setup. - mv = memoryview(col2) - self.assertEqual(mv.contiguous, False) - self.assertEqual(mv.strides, (16,)) - - df = pd.DataFrame(col2, copy=False) - df.columns = ['a'] - - with self.assertRaisesRegex( - qi.IngressError, "Bad column 'a': .*not.*contiguous"): - _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) - - def test_serializing_in_chunks(self): - df = pd.DataFrame({ - 'a': pd.Series(np.arange(30), dtype='int64'), - 'b': pd.Series(np.arange(30), dtype='Int64')}) - parts = [ - df.iloc[:10], - df.iloc[10:20], - df.iloc[20:]] - for index, part in enumerate(parts): - buf = _dataframe(part, table_name='tbl1', at = qi.ServerTimestamp) + def test_bad_category(self): + # We only support string categories + # (unless anyone asks for additional ones). + # We want to test others are rejected. + with self.assertRaisesRegex( + qi.IngressError, "Bad column 'a'.*got a category of .*int64"): + _dataframe( + self.version, + pd.DataFrame({'a': pd.Series([1, 2, 3, 2], dtype='category')}), + table_name='tbl1', at = qi.ServerTimestamp) + + def _test_cat_table(self, count): + slist = [f's{i}' for i in range(count)] + + df = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + + buf = _dataframe(self.version, df, table_name_col=0, at = qi.ServerTimestamp) + exp = ''.join( + f'{s} b={i}i\n' + for i, s in enumerate(slist)) + self.assertEqual(buf, exp.encode("utf-8")) + + slist[2] = None + df2 = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + with self.assertRaisesRegex( + qi.IngressError, 'Table name cannot be null'): + _dataframe(self.version, df2, table_name_col=0, at = qi.ServerTimestamp) + + def test_cat_i8_table(self): + self._test_cat_table(30) + self._test_cat_table(127) + + def test_cat_i16_table(self): + self._test_cat_table(128) + self._test_cat_table(4000) + self._test_cat_table(32767) + + def test_cat_i32_table(self): + self._test_cat_table(32768) + self._test_cat_table(40000) + + def _test_cat_symbol(self, count): + slist = [f's{i}' for i in range(count)] + + df = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + + buf = _dataframe(self.version, df, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) exp = ''.join( - f'tbl1 a={i}i,b={i}i\n' - for i in range(index * 10, (index + 1) * 10)) + f'tbl1,a={s} b={i}i\n' + for i, s in enumerate(slist)) self.assertEqual(buf, exp.encode("utf-8")) - def test_arrow_chunked_array(self): - # We build a table with chunked arrow arrays as columns. - chunks_a = [ - pa.array([1, 2, 3], type=pa.int16()), - pa.array([4, 5, 6], type=pa.int16()), - pa.array([], type=pa.int16()), - pa.array([7, 8, 9], type=pa.int16())] - chunked_a = pa.chunked_array(chunks_a) - chunks_b = [ - pa.array([10, 20], type=pa.int32()), - pa.array([], type=pa.int32()), - pa.array([30, 40, 50, 60], type=pa.int32()), - pa.array([70, 80, 90], type=pa.int32())] - chunked_b = pa.chunked_array(chunks_b) - arr_tab = pa.Table.from_arrays([chunked_a, chunked_b], names=['a', 'b']) - - # NOTE! - # This does *not* preserve the chunking of the arrow arrays. - df = arr_tab.to_pandas() - buf = _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) - exp = ( - b'tbl1 a=1i,b=10i\n' + - b'tbl1 a=2i,b=20i\n' + - b'tbl1 a=3i,b=30i\n' + - b'tbl1 a=4i,b=40i\n' + - b'tbl1 a=5i,b=50i\n' + - b'tbl1 a=6i,b=60i\n' + - b'tbl1 a=7i,b=70i\n' + - b'tbl1 a=8i,b=80i\n' + - b'tbl1 a=9i,b=90i\n') - self.assertEqual(buf, exp) - - if not hasattr(pd, 'ArrowDtype'): - # We don't have pandas ArrowDtype, so we can't test the rest. - return - - # To preserve the chunking we need to use a special pandas type: - pandarrow_a = pd.array(chunked_a, dtype='int16[pyarrow]') - pandarrow_b = pd.array(chunked_b, dtype='int32[pyarrow]') - df = pd.DataFrame({'a': pandarrow_a, 'b': pandarrow_b}) - - # Note that this dtype is experimental (currently), - # so we don't support it yet.. but we have everything in place should we - # need to, so - as for now - we just test that we raise a nice error. - with self.assertRaisesRegex( - qi.IngressError, - "Unsupported dtype int16\[pyarrow\] for column 'a'.*github"): - _dataframe(df, table_name='tbl1', at = qi.ServerTimestamp) - - @unittest.skipIf(not fastparquet, 'fastparquet not installed') - @with_tmp_dir - def test_parquet_roundtrip(self, tmpdir): - pa_parquet_path = tmpdir / 'test_pa.parquet' - fp_parquet_path = tmpdir / 'test_fp.parquet' - df = pd.DataFrame({ - 's': pd.Categorical(['a', 'b', 'a', 'c', 'a']), - 'a': pd.Series([1, 2, 3, 4, 5], dtype='int16'), - 'b': pd.Series([10, 20, 30, None, 50], dtype='UInt8'), - 'c': [0.5, float('nan'), 2.5, 3.5, None]}) - df.to_parquet(pa_parquet_path, engine='pyarrow') - df.to_parquet(fp_parquet_path, engine='fastparquet') - pa2pa_df = pd.read_parquet(pa_parquet_path, engine='pyarrow') - pa2fp_df = pd.read_parquet(pa_parquet_path, engine='fastparquet') - fp2pa_df = pd.read_parquet(fp_parquet_path, engine='pyarrow') - fp2fp_df = pd.read_parquet(fp_parquet_path, engine='fastparquet') - - exp_dtypes = ['category', 'int16', 'UInt8', 'float64'] - self.assertEqual(list(df.dtypes), exp_dtypes) - - def df_eq(exp_df, deser_df, exp_dtypes): - self.assertEqual(list(deser_df.dtypes), exp_dtypes) - if not exp_df.equals(deser_df): - print('\nexp_df:') - print(exp_df) - print('\ndeser_df:') - print(deser_df) - self.assertTrue(exp_df.equals(deser_df)) - - # fastparquet doesn't roundtrip with pyarrow parquet properly. - # It decays categories to object and UInt8 to float64. - # We need to set up special case expected results for that. - fallback_exp_dtypes = [ - np.dtype('O'), - np.dtype('int16'), - np.dtype('float64'), - np.dtype('float64')] - fallback_df = df.astype({'s': 'object', 'b': 'float64'}) - - df_eq(df, pa2pa_df, exp_dtypes) - df_eq(df, pa2fp_df, exp_dtypes) - df_eq(fallback_df, fp2pa_df, fallback_exp_dtypes) - df_eq(df, fp2fp_df, exp_dtypes) - - exp = ( - b'tbl1,s=a a=1i,b=10i,c=0.5\n' + - b'tbl1,s=b a=2i,b=20i,c=NaN\n' + - b'tbl1,s=a a=3i,b=30i,c=2.5\n' + - b'tbl1,s=c a=4i,c=3.5\n' + - b'tbl1,s=a a=5i,b=50i,c=NaN\n') - - fallback_exp = ( - b'tbl1 s="a",a=1i,b=10.0,c=0.5\n' + - b'tbl1 s="b",a=2i,b=20.0,c=NaN\n' + - b'tbl1 s="a",a=3i,b=30.0,c=2.5\n' + - b'tbl1 s="c",a=4i,b=NaN,c=3.5\n' + - b'tbl1 s="a",a=5i,b=50.0,c=NaN\n') - - self.assertEqual(_dataframe(df, table_name='tbl1', at=qi.ServerTimestamp), exp) - self.assertEqual(_dataframe(pa2pa_df, table_name='tbl1', at=qi.ServerTimestamp), exp) - self.assertEqual(_dataframe(pa2fp_df, table_name='tbl1', at=qi.ServerTimestamp), exp) - self.assertEqual(_dataframe(fp2pa_df, table_name='tbl1', at=qi.ServerTimestamp), fallback_exp) - self.assertEqual(_dataframe(fp2fp_df, table_name='tbl1', at=qi.ServerTimestamp), exp) + slist[2] = None + df2 = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + + exp2 = exp.replace('tbl1,a=s2 b=2i\n', 'tbl1 b=2i\n') + buf2 = _dataframe(self.version, df2, table_name='tbl1', symbols=True, at = qi.ServerTimestamp) + self.assertEqual(buf2, exp2.encode("utf-8")) + + def test_cat_i8_symbol(self): + self._test_cat_symbol(30) + self._test_cat_symbol(127) + + def test_cat_i16_symbol(self): + self._test_cat_symbol(128) + self._test_cat_symbol(4000) + self._test_cat_symbol(32767) + + def test_cat_i32_symbol(self): + self._test_cat_symbol(32768) + self._test_cat_symbol(40000) + + def _test_cat_str(self, count): + slist = [f's{i}' for i in range(count)] + + df = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + buf = _dataframe(self.version, df, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) + exp = ''.join( + f'tbl1 a="{s}",b={i}i\n' + for i, s in enumerate(slist)) + self.assertEqual(buf, exp.encode("utf-8")) + + slist[2] = None + df2 = pd.DataFrame({ + 'a': pd.Series(slist, dtype='category'), + 'b': list(range(len(slist)))}) + + exp2 = exp.replace('tbl1 a="s2",b=2i\n', 'tbl1 b=2i\n') + buf2 = _dataframe(self.version, df2, table_name='tbl1', symbols=False, at = qi.ServerTimestamp) + self.assertEqual(buf2, exp2.encode("utf-8")) + + def test_cat_i8_str(self): + self._test_cat_str(30) + self._test_cat_str(127) + + def test_cat_i16_str(self): + self._test_cat_str(128) + self._test_cat_str(4000) + self._test_cat_str(32767) + + def test_cat_i32_str(self): + self._test_cat_str(32768) + self._test_cat_str(40000) + + def test_all_nulls_pyobj_col(self): + df = pd.DataFrame({ + 'a': [None, pd.NA, float('nan')], + 'b': [1, 2, 3]}) + buf = _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 b=1i\n' + + b'tbl1 b=2i\n' + + b'tbl1 b=3i\n') + + def test_strided_numpy_column(self): + two_d = np.array([ + [1, 10], + [2, 20], + [3, 30]], dtype='int64') + col2 = two_d[:, 1] + col2.flags['WRITEABLE'] = False + + # Checking our test case setup. + mv = memoryview(col2) + self.assertEqual(mv.contiguous, False) + self.assertEqual(mv.strides, (16,)) + + df = pd.DataFrame(col2, copy=False) + df.columns = ['a'] + + with self.assertRaisesRegex( + qi.IngressError, "Bad column 'a': .*not.*contiguous"): + _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) + + def test_serializing_in_chunks(self): + df = pd.DataFrame({ + 'a': pd.Series(np.arange(30), dtype='int64'), + 'b': pd.Series(np.arange(30), dtype='Int64')}) + parts = [ + df.iloc[:10], + df.iloc[10:20], + df.iloc[20:]] + for index, part in enumerate(parts): + buf = _dataframe(self.version, part, table_name='tbl1', at = qi.ServerTimestamp) + exp = ''.join( + f'tbl1 a={i}i,b={i}i\n' + for i in range(index * 10, (index + 1) * 10)) + self.assertEqual(buf, exp.encode("utf-8")) + + def test_arrow_chunked_array(self): + # We build a table with chunked arrow arrays as columns. + chunks_a = [ + pa.array([1, 2, 3], type=pa.int16()), + pa.array([4, 5, 6], type=pa.int16()), + pa.array([], type=pa.int16()), + pa.array([7, 8, 9], type=pa.int16())] + chunked_a = pa.chunked_array(chunks_a) + chunks_b = [ + pa.array([10, 20], type=pa.int32()), + pa.array([], type=pa.int32()), + pa.array([30, 40, 50, 60], type=pa.int32()), + pa.array([70, 80, 90], type=pa.int32())] + chunked_b = pa.chunked_array(chunks_b) + arr_tab = pa.Table.from_arrays([chunked_a, chunked_b], names=['a', 'b']) + + # NOTE! + # This does *not* preserve the chunking of the arrow arrays. + df = arr_tab.to_pandas() + buf = _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) + exp = ( + b'tbl1 a=1i,b=10i\n' + + b'tbl1 a=2i,b=20i\n' + + b'tbl1 a=3i,b=30i\n' + + b'tbl1 a=4i,b=40i\n' + + b'tbl1 a=5i,b=50i\n' + + b'tbl1 a=6i,b=60i\n' + + b'tbl1 a=7i,b=70i\n' + + b'tbl1 a=8i,b=80i\n' + + b'tbl1 a=9i,b=90i\n') + self.assertEqual(buf, exp) + + if not hasattr(pd, 'ArrowDtype'): + # We don't have pandas ArrowDtype, so we can't test the rest. + return + + # To preserve the chunking we need to use a special pandas type: + pandarrow_a = pd.array(chunked_a, dtype='int16[pyarrow]') + pandarrow_b = pd.array(chunked_b, dtype='int32[pyarrow]') + df = pd.DataFrame({'a': pandarrow_a, 'b': pandarrow_b}) + + # Note that this dtype is experimental (currently), + # so we don't support it yet.. but we have everything in place should we + # need to, so - as for now - we just test that we raise a nice error. + with self.assertRaisesRegex( + qi.IngressError, + "Unsupported dtype int16\[pyarrow\] for column 'a'.*github"): + _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) + + @unittest.skipIf(not fastparquet, 'fastparquet not installed') + @with_tmp_dir + def test_parquet_roundtrip(self, tmpdir): + pa_parquet_path = tmpdir / 'test_pa.parquet' + fp_parquet_path = tmpdir / 'test_fp.parquet' + df = pd.DataFrame({ + 's': pd.Categorical(['a', 'b', 'a', 'c', 'a']), + 'a': pd.Series([1, 2, 3, 4, 5], dtype='int16'), + 'b': pd.Series([10, 20, 30, None, 50], dtype='UInt8'), + 'c': [0.5, float('nan'), 2.5, 3.5, None]}) + df.to_parquet(pa_parquet_path, engine='pyarrow') + df.to_parquet(fp_parquet_path, engine='fastparquet') + pa2pa_df = pd.read_parquet(pa_parquet_path, engine='pyarrow') + pa2fp_df = pd.read_parquet(pa_parquet_path, engine='fastparquet') + fp2pa_df = pd.read_parquet(fp_parquet_path, engine='pyarrow') + fp2fp_df = pd.read_parquet(fp_parquet_path, engine='fastparquet') + + exp_dtypes = ['category', 'int16', 'UInt8', 'float64'] + self.assertEqual(list(df.dtypes), exp_dtypes) + + def df_eq(exp_df, deser_df, exp_dtypes): + self.assertEqual(list(deser_df.dtypes), exp_dtypes) + if not exp_df.equals(deser_df): + print('\nexp_df:') + print(exp_df) + print('\ndeser_df:') + print(deser_df) + self.assertTrue(exp_df.equals(deser_df)) + + # fastparquet doesn't roundtrip with pyarrow parquet properly. + # It decays categories to object and UInt8 to float64. + # We need to set up special case expected results for that. + fallback_exp_dtypes = [ + np.dtype('O'), + np.dtype('int16'), + np.dtype('float64'), + np.dtype('float64')] + fallback_df = df.astype({'s': 'object', 'b': 'float64'}) + + df_eq(df, pa2pa_df, exp_dtypes) + df_eq(df, pa2fp_df, exp_dtypes) + df_eq(fallback_df, fp2pa_df, fallback_exp_dtypes) + df_eq(df, fp2fp_df, exp_dtypes) + + exp = ( + b'tbl1,s=a a=1i,b=10i,c' + _float_binary_bytes(0.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1,s=b a=2i,b=20i,c' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1,s=a a=3i,b=30i,c' + _float_binary_bytes(2.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1,s=c a=4i,c' + _float_binary_bytes(3.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1,s=a a=5i,b=50i,c' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + + fallback_exp = ( + b'tbl1 s="a",a=1i,b' + _float_binary_bytes(10.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + + _float_binary_bytes(0.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 s="b",a=2i,b' + _float_binary_bytes(20.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 s="a",a=3i,b' + _float_binary_bytes(30.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + + _float_binary_bytes(2.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 s="c",a=4i,b' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + + _float_binary_bytes(3.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + + b'tbl1 s="a",a=5i,b' + _float_binary_bytes(50.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + + self.assertEqual(_dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp), exp) + self.assertEqual(_dataframe(self.version, pa2pa_df, table_name='tbl1', at=qi.ServerTimestamp), exp) + self.assertEqual(_dataframe(self.version, pa2fp_df, table_name='tbl1', at=qi.ServerTimestamp), exp) + self.assertEqual(_dataframe(self.version, fp2pa_df, table_name='tbl1', at=qi.ServerTimestamp), fallback_exp) + self.assertEqual(_dataframe(self.version, fp2fp_df, table_name='tbl1', at=qi.ServerTimestamp), exp) + +class TestPandasLineProtocolVersionV1(TestPandasBase.TestPandas): + name = 'init' + version = qi.LineProtocolVersion.LineProtocolVersionV1 + +class TestPandasLineProtocolVersionV2(TestPandasBase.TestPandas): + name = 'init' + version = qi.LineProtocolVersion.LineProtocolVersionV2 if __name__ == '__main__': if os.environ.get('TEST_QUESTDB_PROFILE') == '1': From 53f929d82b415f538dc9e2065200eee1de9164ff Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 12 May 2025 16:20:43 +0800 Subject: [PATCH 11/15] update dep. --- c-questdb-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index 8d739f1c..066d0a1b 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 8d739f1cf895bcf44b22db95eb0f5b50a2eebeb4 +Subproject commit 066d0a1b68eae297f29d9547081e045c2c4ea9d5 From 0106e0f4fa6398c842fb38fea39b364c342d290e Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 12 May 2025 22:47:36 +0800 Subject: [PATCH 12/15] dataframe support ndarray. --- c-questdb-client | 2 +- src/questdb/dataframe.pxi | 32 ++++++++++++-------------------- test/test_dataframe.py | 10 ++++++++++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 066d0a1b..55288974 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 066d0a1b68eae297f29d9547081e045c2c4ea9d5 +Subproject commit 5528897466140451cc8e0456bebd2ef932c2634a diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index 36bb9480..baf69081 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -10,7 +10,6 @@ cdef struct auto_flush_mode_t: int64_t row_count int64_t byte_count - cdef struct auto_flush_t: line_sender* sender auto_flush_mode_t mode @@ -27,7 +26,6 @@ cdef auto_flush_t auto_flush_blank() noexcept nogil: af.last_flush_ms = NULL return af - cdef bint should_auto_flush( const auto_flush_mode_t* af_mode, line_sender_buffer* ls_buf, @@ -462,7 +460,6 @@ cdef object _NUMPY_FLOAT32 = None cdef object _NUMPY_FLOAT64 = None cdef object _NUMPY_DATETIME64_NS = None cdef object _NUMPY_OBJECT = None -cdef object _NUMPY_ARRAY = None cdef object _PANDAS = None # module object cdef object _PANDAS_NA = None # pandas.NA cdef object _PYARROW = None # module object, if available or None @@ -495,7 +492,6 @@ cdef object _dataframe_may_import_deps(): global _NUMPY_FLOAT32 global _NUMPY_FLOAT64 global _NUMPY_DATETIME64_NS - global _NUMPY_ARRAY global _NUMPY_OBJECT if _NUMPY is not None: return @@ -522,7 +518,6 @@ cdef object _dataframe_may_import_deps(): _NUMPY_FLOAT32 = type(_NUMPY.dtype('float32')) _NUMPY_FLOAT64 = type(_NUMPY.dtype('float64')) _NUMPY_DATETIME64_NS = type(_NUMPY.dtype('datetime64[ns]')) - __NUMPY_ARRAY = _NUMPY.ndarray _NUMPY_OBJECT = type(_NUMPY.dtype('object')) _PANDAS = pandas _PANDAS_NA = pandas.NA @@ -945,6 +940,8 @@ cdef void_int _dataframe_series_sniff_pyobj( col.setup.source = col_source_t.col_source_float_pyobj elif PyUnicode_CheckExact(obj): col.setup.source = col_source_t.col_source_str_pyobj + elif PyArray_CheckExact(obj): + col.setup.source = col_source_t.col_source_array_numpy elif PyBytes_CheckExact(obj): raise IngressError( IngressErrorCode.BadDataFrame, @@ -1065,9 +1062,6 @@ cdef void_int _dataframe_resolve_source_and_buffers( _dataframe_is_supported_datetime(dtype)): col.setup.source = col_source_t.col_source_dt64ns_tz_arrow _dataframe_series_as_arrow(pandas_col, col) - elif isinstance(dtype, _NUMPY_ARRAY): - col.setup.source = col_source_t.col_source_array_numpy - _dataframe_series_as_pybuf(pandas_col, col) elif isinstance(dtype, _NUMPY_OBJECT): _dataframe_series_sniff_pyobj(pandas_col, col) else: @@ -2040,29 +2034,27 @@ cdef void_int _dataframe_serialize_cell_column_array__array_numpy( qdb_pystr_buf* b, col_t* col, PyThreadState** gs) except -1: - - cdef cnp.ndarray arr = col.cursor.chunk.buffers[1] + cdef PyObject** access = col.cursor.chunk.buffers[1] + cdef PyObject* cell = access[col.cursor.offset] + cdef cnp.ndarray arr = cell cdef PyArray_Descr* dtype_ptr = cnp.PyArray_DESCR(arr) if dtype_ptr.type_num != NPY_FLOAT64: raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Only support float64 array, got: %s' % str(arr.dtype)) cdef: size_t rank = cnp.PyArray_NDIM(arr) - const uint8_t * data_ptr - line_sender_error * err = NULL - + const uint8_t* data_ptr if rank == 0: raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') - if rank > MAX_ARRAY_DIM: - raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') + if rank > 32: + raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions 32, got {rank}') data_ptr = cnp.PyArray_DATA(arr) - + cdef line_sender_error* err = NULL if not line_sender_buffer_column_f64_arr( - ls_buf, col.name, rank, cnp.PyArray_DIMS(arr), - cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): + ls_buf, col.name, rank, cnp.PyArray_DIMS(arr), + cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): raise c_err_to_py(err) - cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow( line_sender_buffer* ls_buf, qdb_pystr_buf* b, @@ -2220,7 +2212,7 @@ cdef void_int _dataframe_serialize_cell( elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_numpy: _dataframe_serialize_cell_column_ts__dt64ns_numpy(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_column_array__array_numpy: - _dataframe_serialize_cell_column_ts__dt64ns_numpy(ls_buf, b, col, gs) + _dataframe_serialize_cell_column_array__array_numpy(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_tz_arrow: _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow(ls_buf, b, col, gs) elif dc == col_dispatch_code_t.col_dispatch_code_at__dt64ns_numpy: diff --git a/test/test_dataframe.py b/test/test_dataframe.py index d55c180e..b1810350 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -1607,6 +1607,16 @@ def df_eq(exp_df, deser_df, exp_dtypes): self.assertEqual(_dataframe(self.version, fp2pa_df, table_name='tbl1', at=qi.ServerTimestamp), fallback_exp) self.assertEqual(_dataframe(self.version, fp2fp_df, table_name='tbl1', at=qi.ServerTimestamp), exp) + def test_f64_np_array(self): + df = pd.DataFrame({ + 'a': [np.array([1.0], np.float64), np.array([2.0], np.float64), np.array([3.0], np.float64)]}) + buf = _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 b' + _array_binary_bytes(np.array([1.0], np.float64)) + b'\n' + + b'tbl1 b' + _array_binary_bytes(np.array([2.0], np.float64)) + b'\n' + + b'tbl1 b' + _array_binary_bytes(np.array([3.0], np.float64)) + b'\n') + class TestPandasLineProtocolVersionV1(TestPandasBase.TestPandas): name = 'init' version = qi.LineProtocolVersion.LineProtocolVersionV1 From dc613d5cb0501d2efc3dde7e3bcb8b4706ff7b28 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 12 May 2025 23:46:15 +0800 Subject: [PATCH 13/15] data frame support ndarray. --- c-questdb-client | 2 +- src/questdb/dataframe.pxi | 5 ++++- test/test_dataframe.py | 19 +++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 55288974..b091bf62 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 5528897466140451cc8e0456bebd2ef932c2634a +Subproject commit b091bf62cede3153a74bb3f09ad104026b5b1e7f diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index baf69081..8e3d85af 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -10,6 +10,7 @@ cdef struct auto_flush_mode_t: int64_t row_count int64_t byte_count + cdef struct auto_flush_t: line_sender* sender auto_flush_mode_t mode @@ -2044,15 +2045,17 @@ cdef void_int _dataframe_serialize_cell_column_array__array_numpy( cdef: size_t rank = cnp.PyArray_NDIM(arr) const uint8_t* data_ptr + line_sender_error * err = NULL if rank == 0: raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') if rank > 32: raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions 32, got {rank}') data_ptr = cnp.PyArray_DATA(arr) - cdef line_sender_error* err = NULL + if not line_sender_buffer_column_f64_arr( ls_buf, col.name, rank, cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): + _ensure_has_gil(gs) raise c_err_to_py(err) cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow( diff --git a/test/test_dataframe.py b/test/test_dataframe.py index b1810350..19f7447f 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -1610,12 +1610,19 @@ def df_eq(exp_df, deser_df, exp_dtypes): def test_f64_np_array(self): df = pd.DataFrame({ 'a': [np.array([1.0], np.float64), np.array([2.0], np.float64), np.array([3.0], np.float64)]}) - buf = _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) - self.assertEqual( - buf, - b'tbl1 b' + _array_binary_bytes(np.array([1.0], np.float64)) + b'\n' + - b'tbl1 b' + _array_binary_bytes(np.array([2.0], np.float64)) + b'\n' + - b'tbl1 b' + _array_binary_bytes(np.array([3.0], np.float64)) + b'\n') + + if self.version == qi.LineProtocolVersion.LineProtocolVersionV1: + with self.assertRaisesRegex( + qi.IngressError, + "line protocol version v1 does not support array datatype"): + _ = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + else: + buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) + self.assertEqual( + buf, + b'tbl1 a=' + _array_binary_bytes(np.array([1.0], np.float64)) + b'\n' + + b'tbl1 a=' + _array_binary_bytes(np.array([2.0], np.float64)) + b'\n' + + b'tbl1 a=' + _array_binary_bytes(np.array([3.0], np.float64)) + b'\n') class TestPandasLineProtocolVersionV1(TestPandasBase.TestPandas): name = 'init' From ccec47fc893ac096fa47f7dc8828343d8bdd4d40 Mon Sep 17 00:00:00 2001 From: victor Date: Tue, 13 May 2025 08:30:53 +0800 Subject: [PATCH 14/15] fix tests and typo. --- src/questdb/ingress.pyx | 2 +- test/test.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 0fff3b0b..2e99a4cf 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -2228,7 +2228,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object default_line_protocol_version=None, # Default off + object default_line_protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ diff --git a/test/test.py b/test/test.py index c99a0366..a351d4b8 100755 --- a/test/test.py +++ b/test/test.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import sys -from examples import buffer - sys.dont_write_bytecode = True import os import unittest @@ -35,7 +33,8 @@ pd = None if pd is not None: - from test_dataframe import TestPandas + from test_dataframe import TestPandasLineProtocolVersionV1 + from test_dataframe import TestPandasLineProtocolVersionV2 else: class TestNoPandas(unittest.TestCase): def test_no_pandas(self): @@ -245,7 +244,7 @@ def test_array_edge_cases(self): buf.row('invalid_table', columns={'col': complex_arr}, at=qi.ServerTimestamp) # large array - with self.assertRaisesRegex(qi.IngressError, "Array total elem size overflow"): + with self.assertRaisesRegex(qi.IngressError, "Array buffer size too big"): large_arr = np.arange(2147483648, dtype=np.float64) buf.row('large_array', columns={'col': large_arr}, at=qi.ServerTimestamp) @@ -1229,7 +1228,7 @@ def test_array_error_cases(self): at=qi.TimestampNanos(11111)) # large array - with self.assertRaisesRegex(qi.IngressError, "Array total elem size overflow"): + with self.assertRaisesRegex(qi.IngressError, "Array buffer size too big:"): large_arr = np.arange(2147483648, dtype=np.float64) with HttpServer() as server, self.builder('http', 'localhost', server.port) as sender: sender.row( From 6dd29221acf9c198c3da7c47999ce04a15609511 Mon Sep 17 00:00:00 2001 From: victor Date: Fri, 16 May 2025 18:46:23 +0800 Subject: [PATCH 15/15] optimize python client. --- c-questdb-client | 2 +- src/questdb/dataframe.pxi | 8 +- src/questdb/ingress.pyi | 12 +-- src/questdb/ingress.pyx | 158 ++++++++++++++++++------------------ src/questdb/line_sender.pxd | 35 ++++---- test/mock_server.py | 2 +- test/test.py | 74 +++++++++-------- test/test_dataframe.py | 128 ++++++++++++++--------------- 8 files changed, 212 insertions(+), 207 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index b091bf62..b9e023f3 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit b091bf62cede3153a74bb3f09ad104026b5b1e7f +Subproject commit b9e023f3b2fb1d09ab4bf777213f8599be69694b diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index 8e3d85af..1fb5375b 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -2044,14 +2044,8 @@ cdef void_int _dataframe_serialize_cell_column_array__array_numpy( 'Only support float64 array, got: %s' % str(arr.dtype)) cdef: size_t rank = cnp.PyArray_NDIM(arr) - const uint8_t* data_ptr + const uint8_t* data_ptr = cnp.PyArray_DATA(arr) line_sender_error * err = NULL - if rank == 0: - raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') - if rank > 32: - raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions 32, got {rank}') - data_ptr = cnp.PyArray_DATA(arr) - if not line_sender_buffer_column_f64_arr( ls_buf, col.name, rank, cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index 16b6daed..61ab034a 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -58,13 +58,13 @@ class IngressErrorCode(Enum): ArrayLargeDimError = ... ArrayInternalError = ... ArrayWriteToBufferError = ... - LineProtocolVersionError = ... + ProtocolVersionError = ... BadDataFrame = ... -class LineProtocolVersion(Enum): +class ProtocolVersion(Enum): """Line protocol version.""" - LineProtocolVersionV1 = ... - LineProtocolVersionV2 = ... + ProtocolVersionV1 = ... + ProtocolVersionV2 = ... class IngressError(Exception): """An error whilst using the ``Sender`` or constructing its ``Buffer``.""" @@ -310,7 +310,7 @@ class Buffer: """ - def __init__(self, init_buf_size: int = 65536, max_name_len: int = 127, line_protocol_version: LineProtocolVersion = LineProtocolVersion.LineProtocolVersionV2): + def __init__(self, init_buf_size: int = 65536, max_name_len: int = 127, protocol_version: ProtocolVersion = ProtocolVersion.ProtocolVersionV2): """ Create a new buffer with the an initial capacity and max name length. :param int init_buf_size: Initial capacity of the buffer in bytes. @@ -938,7 +938,7 @@ class Sender: Time interval threshold for the auto-flush logic, or None if disabled. """ - def default_line_protocol_version(self) -> LineProtocolVersion: + def default_protocol_version(self) -> ProtocolVersion: """ Returns the QuestDB server's recommended default line protocol version. """ diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 2e99a4cf..40148016 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -131,17 +131,17 @@ class IngressErrorCode(Enum): ArrayLargeDimError = line_sender_error_array_large_dim ArrayInternalError = line_sender_error_array_view_internal_error ArrayWriteToBufferError = line_sender_error_array_view_write_to_buffer_error - LineProtocolVersionError = line_sender_error_line_protocol_version_error - BadDataFrame = line_sender_error_line_protocol_version_error + 1 + ProtocolVersionError = line_sender_error_protocol_version_error + BadDataFrame = line_sender_error_protocol_version_error + 1 def __str__(self) -> str: """Return the name of the enum.""" return self.name -class LineProtocolVersion(Enum): +class ProtocolVersion(Enum): """Line protocol version.""" - LineProtocolVersionV1 = line_protocol_version_1 - LineProtocolVersionV2 = line_protocol_version_2 + ProtocolVersionV1 = protocol_version_1 + ProtocolVersionV2 = protocol_version_2 class IngressError(Exception): """An error whilst using the ``Sender`` or constructing its ``Buffer``.""" @@ -184,8 +184,8 @@ cdef inline object c_err_code_to_py(line_sender_error_code code): return IngressErrorCode.ArrayInternalError elif code == line_sender_error_array_view_write_to_buffer_error: return IngressErrorCode.ArrayWriteToBufferError - elif code == line_sender_error_line_protocol_version_error: - return IngressErrorCode.LineProtocolVersionError + elif code == line_sender_error_protocol_version_error: + return IngressErrorCode.ProtocolVersionError else: raise ValueError('Internal error converting error code.') @@ -644,6 +644,13 @@ cdef class SenderTransaction: IngressErrorCode.InvalidTimestamp, "`at` must be of type TimestampNanos, datetime, or ServerTimestamp" ) + + if self._sender._buffer is None: + raise IngressError( + IngressErrorCode.InvalidApiCall, + "row() can\'t be called: Not connected." + ) + self._sender._buffer._row( False, # allow_auto_flush self._table_name, @@ -668,6 +675,11 @@ cdef class SenderTransaction: IngressErrorCode.InvalidTimestamp, "`at` must be of type TimestampNanos, datetime, or ServerTimestamp" ) + if self._sender._buffer is None: + raise IngressError( + IngressErrorCode.InvalidApiCall, + "dataframe() can\'t be called: Not connected." + ) _dataframe( auto_flush_blank(), self._sender._buffer._impl, @@ -708,7 +720,8 @@ cdef class SenderTransaction: raise IngressError( IngressErrorCode.InvalidApiCall, 'Transaction already completed, can\'t rollback.') - self._sender._buffer.clear() + if self._sender._buffer is not None: + self._sender._buffer.clear() self._sender._in_txn = False self._complete = True @@ -782,26 +795,24 @@ cdef class Buffer: cdef size_t _max_name_len cdef object _row_complete_sender - def __cinit__(self, init_buf_size: int=65536, max_name_len: int=127, line_protocol_version: LineProtocolVersion=LineProtocolVersion.LineProtocolVersionV2): + def __cinit__(self, protocol_version: ProtocolVersion, init_buf_size: int=65536, max_name_len: int=127): """ Create a new buffer with the an initial capacity and max name length. :param int init_buf_size: Initial capacity of the buffer in bytes. :param int max_name_len: Maximum length of a table or column name. """ - self._cinit_impl(init_buf_size, max_name_len, line_protocol_version.value) + self._cinit_impl(init_buf_size, max_name_len, protocol_version.value) - cdef inline _cinit_impl(self, size_t init_buf_size, size_t max_name_len, line_protocol_version version): - self._impl = line_sender_buffer_with_max_name_len(max_name_len) + cdef inline _cinit_impl(self, size_t init_buf_size, size_t max_name_len, protocol_version version): + self._impl = line_sender_buffer_with_max_name_len(max_name_len, version) self._b = qdb_pystr_buf_new() line_sender_buffer_reserve(self._impl, init_buf_size) cdef line_sender_error* err = NULL - if not line_sender_buffer_set_line_protocol_version(self._impl, version, &err): - raise c_err_to_py(err) - self._init_buf_size = init_buf_size self._max_name_len = max_name_len self._row_complete_sender = None + def __dealloc__(self): self._row_complete_sender = None qdb_pystr_buf_free(self._b) @@ -938,15 +949,9 @@ cdef class Buffer: raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Only support float64 array, got: %s' % str(arr.dtype)) cdef: size_t rank = cnp.PyArray_NDIM(arr) - const uint8_t * data_ptr + const uint8_t * data_ptr = cnp.PyArray_DATA(arr) line_sender_error * err = NULL - if rank == 0: - raise IngressError(IngressErrorCode.ArrayWriteToBufferError, 'Zero-dimensional arrays are not supported') - if rank > MAX_ARRAY_DIM: - raise IngressError(IngressErrorCode.ArrayLargeDimError, f'Max dimensions {MAX_ARRAY_DIM}, got {rank}') - data_ptr = cnp.PyArray_DATA(arr) - if not line_sender_buffer_column_f64_arr( self._impl, c_name, rank, cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), data_ptr, cnp.PyArray_NBYTES(arr), &err): @@ -1756,7 +1761,6 @@ cdef object parse_conf_str( 'auto_flush_rows': str, 'auto_flush_bytes': str, 'auto_flush_interval': str, - 'disable_line_protocol_version': str, 'init_buf_size': int, 'max_name_len': int, } @@ -1787,8 +1791,6 @@ cdef class Sender: cdef size_t _init_buf_size cdef size_t _max_name_len cdef bint _in_txn - cdef line_protocol_version _line_protocol_version - cdef bint _auto_detect_line_protocol_version cdef void_int _set_sender_fields( self, @@ -1812,7 +1814,7 @@ cdef class Sender: object auto_flush_rows, object auto_flush_bytes, object auto_flush_interval, - str default_line_protocol_version, + str protocol_version, object init_buf_size, object max_name_len) except -1: """ @@ -1873,6 +1875,19 @@ cdef class Sender: if not line_sender_opts_token_y(self._opts, c_token_y, &err): raise c_err_to_py(err) + if protocol_version is not None: + if protocol_version == "1": + if not line_sender_opts_protocol_version(self._opts, protocol_version_1, &err): + raise c_err_to_py(err) + elif protocol_version == "2": + if not line_sender_opts_protocol_version(self._opts, protocol_version_2, &err): + raise c_err_to_py(err) + elif protocol_version != "auto": + raise IngressError( + IngressErrorCode.ConfigError, + '"protocol_version" must be None, "auto", "1" or "2"' + + f' not {protocol_version!r}') + if auth_timeout is not None: if isinstance(auth_timeout, int): c_auth_timeout = auth_timeout @@ -1960,40 +1975,8 @@ cdef class Sender: auto_flush_interval, &self._auto_flush_mode) - # default line protocol version is v2 for tcp/tcps and auto-detection for http/https - if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: - self._line_protocol_version = line_protocol_version_2 - self._auto_detect_line_protocol_version = False - else: - self._auto_detect_line_protocol_version = True - - if default_line_protocol_version is not None: - if default_line_protocol_version == "v1": - self._line_protocol_version = line_protocol_version_1 - self._auto_detect_line_protocol_version = False - if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): - raise c_err_to_py(err) - elif default_line_protocol_version == "v2": - self._line_protocol_version = line_protocol_version_2 - self._auto_detect_line_protocol_version = False - if not line_sender_opts_disable_line_protocol_validation(self._opts, &err): - raise c_err_to_py(err) - elif default_line_protocol_version != "auto": - raise IngressError( - IngressErrorCode.ConfigError, - '"default_line_protocol_version" must be None, "auto", "v1" or "v2"' + - f'not {default_line_protocol_version!r}') - self._init_buf_size = init_buf_size or 65536 self._max_name_len = max_name_len or 127 - - # self._buffer will be constructed after establish connection for http/https. - if self._c_protocol == line_sender_protocol_tcp or self._c_protocol == line_sender_protocol_tcps: - self._buffer = Buffer( - init_buf_size=self._init_buf_size, - max_name_len=self._max_name_len, - line_protocol_version=LineProtocolVersion(self._line_protocol_version)) - self._last_flush_ms = calloc(1, sizeof(int64_t)) def __cinit__(self): @@ -2031,7 +2014,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object default_line_protocol_version=None, # Default auto + object protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 @@ -2075,7 +2058,7 @@ cdef class Sender: auto_flush_rows, auto_flush_bytes, auto_flush_interval, - default_line_protocol_version, + protocol_version, init_buf_size, max_name_len) finally: @@ -2103,7 +2086,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object default_line_protocol_version=None, # Default auto + object protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2158,7 +2141,7 @@ cdef class Sender: 'auto_flush_rows': auto_flush_rows, 'auto_flush_bytes': auto_flush_bytes, 'auto_flush_interval': auto_flush_interval, - 'default_line_protocol_version': default_line_protocol_version, + 'protocol_version': protocol_version, 'init_buf_size': init_buf_size, 'max_name_len': max_name_len, }.items(): @@ -2199,7 +2182,7 @@ cdef class Sender: params.get('auto_flush_rows'), params.get('auto_flush_bytes'), params.get('auto_flush_interval'), - params.get('default_line_protocol_version'), + params.get('protocol_version'), params.get('init_buf_size'), params.get('max_name_len')) @@ -2228,7 +2211,7 @@ cdef class Sender: object auto_flush_rows=None, # Default 75000 (HTTP) or 600 (TCP) object auto_flush_bytes=None, # Default off object auto_flush_interval=None, # Default 1000 milliseconds - object default_line_protocol_version=None, # Default auto + object protocol_version=None, # Default auto object init_buf_size=None, # 64KiB object max_name_len=None): # 127 """ @@ -2268,7 +2251,7 @@ cdef class Sender: auto_flush_rows=auto_flush_rows, auto_flush_bytes=auto_flush_bytes, auto_flush_interval=auto_flush_interval, - default_line_protocol_version=default_line_protocol_version, + protocol_version=protocol_version, init_buf_size=init_buf_size, max_name_len=max_name_len) @@ -2281,9 +2264,9 @@ cdef class Sender: `max_name_len`. """ return Buffer( + protocol_version=self.default_protocol_version, init_buf_size=self._init_buf_size, - max_name_len=self._max_name_len, - line_protocol_version=self.default_line_protocol_version()) + max_name_len=self._max_name_len) @property def init_buf_size(self) -> int: @@ -2338,14 +2321,13 @@ cdef class Sender: return None return timedelta(milliseconds=self._auto_flush_mode.interval) - def default_line_protocol_version(self) -> LineProtocolVersion: - if self._auto_detect_line_protocol_version: - if self._impl == NULL: - raise IngressError( - IngressErrorCode.InvalidApiCall, - 'default_line_protocol_version() can\'t be called: Not connected.') - return LineProtocolVersion(line_sender_default_line_protocol_version(self._impl)) - return LineProtocolVersion(self._line_protocol_version) + @property + def default_protocol_version(self) -> ProtocolVersion: + if self._impl == NULL: + raise IngressError( + IngressErrorCode.InvalidApiCall, + 'default_protocol_version() can\'t be called: Not connected.') + return ProtocolVersion(line_sender_default_protocol_version(self._impl)) def establish(self): """ @@ -2373,9 +2355,9 @@ cdef class Sender: if self._buffer is None: self._buffer = Buffer( + protocol_version=self.default_protocol_version, init_buf_size=self._init_buf_size, - max_name_len=self._max_name_len, - line_protocol_version=self.default_line_protocol_version()) + max_name_len=self._max_name_len) line_sender_opts_free(self._opts) self._opts = NULL @@ -2399,7 +2381,10 @@ cdef class Sender: Also see :func:`Sender.__len__`. """ - return bytes(self._buffer) + if self._buffer is None: + return b'' + else: + return bytes(self._buffer) def __len__(self) -> int: """ @@ -2407,7 +2392,10 @@ cdef class Sender: Equivalent (but cheaper) to ``len(bytes(sender))``. """ - return len(self._buffer) + if self._buffer is None: + return 0 + else: + return len(self._buffer) def transaction(self, table_name: str): """ @@ -2440,6 +2428,12 @@ cdef class Sender: IngressErrorCode.InvalidTimestamp, "`at` must be of type TimestampNanos, datetime, or ServerTimestamp" ) + if self._buffer is None: + raise IngressError( + IngressErrorCode.InvalidApiCall, + "row() can\'t be called: Not connected." + ) + self._buffer.row(table_name, symbols=symbols, columns=columns, at=at) return self @@ -2505,6 +2499,12 @@ cdef class Sender: af.sender = self._impl af.mode = self._auto_flush_mode af.last_flush_ms = self._last_flush_ms + + if self._buffer is None: + raise IngressError( + IngressErrorCode.InvalidApiCall, + "dataframe() can\'t be called: Not connected." + ) _dataframe( af, self._buffer._impl, diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index 43f17033..f5ce3d67 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -43,7 +43,7 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error_array_large_dim line_sender_error_array_view_internal_error line_sender_error_array_view_write_to_buffer_error - line_sender_error_line_protocol_version_error + line_sender_error_protocol_version_error cdef enum line_sender_protocol: line_sender_protocol_tcp, @@ -51,9 +51,9 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_protocol_http, line_sender_protocol_https, - cdef enum line_protocol_version: - line_protocol_version_1 = 1, - line_protocol_version_2 = 2, + cdef enum protocol_version: + protocol_version_1 = 1, + protocol_version_2 = 2, cdef enum line_sender_ca: line_sender_ca_webpki_roots, @@ -130,16 +130,12 @@ cdef extern from "questdb/ingress/line_sender.h": pass line_sender_buffer* line_sender_buffer_new( + protocol_version version, ) noexcept nogil line_sender_buffer* line_sender_buffer_with_max_name_len( - size_t max_name_len - ) noexcept nogil - - bint line_sender_buffer_set_line_protocol_version( - line_sender_buffer* buffer, - line_protocol_version version, - line_sender_error** err_out + size_t max_name_len, + protocol_version version, ) noexcept nogil void line_sender_buffer_free( @@ -339,8 +335,9 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil - bint line_sender_opts_disable_line_protocol_validation( + bint line_sender_opts_protocol_version( line_sender_opts* opts, + protocol_version version, line_sender_error** err_out ) noexcept nogil @@ -414,8 +411,18 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil - line_protocol_version line_sender_default_line_protocol_version( - const line_sender * sender); + protocol_version line_sender_default_protocol_version( + const line_sender * sender + ) noexcept nogil + + line_sender_buffer* line_sender_buffer_new_for_sender( + const line_sender * sender + ) noexcept nogil + + line_sender_buffer* line_sender_buffer_with_max_name_len_for_sender( + const line_sender * sender, + size_t max_name_len + ) noexcept nogil bint line_sender_must_close( const line_sender* sender diff --git a/test/mock_server.py b/test/mock_server.py index d9db5a11..a654923b 100644 --- a/test/mock_server.py +++ b/test/mock_server.py @@ -114,7 +114,7 @@ def close(self): def __exit__(self, _ex_type, _ex_value, _ex_tb): self.close() -SETTINGS_WITH_PROTOCOL_VERSION = b'{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "line.proto.default.version": 2, "line.proto.support.versions": [1, 2], "ilp.proto.transports": [ "tcp", "http" ], "posthog.enabled": false, "posthog.api.key": null }' +SETTINGS_WITH_PROTOCOL_VERSION = b'{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "line.proto.support.versions": [1, 2], "ilp.proto.transports": [ "tcp", "http", "tcps", "https"], "posthog.enabled": false, "posthog.api.key": null }' SETTINGS_WITHOUT_PROTOCOL_VERSION = b'{ "release.type": "OSS", "release.version": "[DEVELOPMENT]", "acl.enabled": false, "posthog.enabled": false, "posthog.api.key": null }' class HttpServer: diff --git a/test/test.py b/test/test.py index a351d4b8..ad32e682 100755 --- a/test/test.py +++ b/test/test.py @@ -33,8 +33,8 @@ pd = None if pd is not None: - from test_dataframe import TestPandasLineProtocolVersionV1 - from test_dataframe import TestPandasLineProtocolVersionV2 + from test_dataframe import TestPandasProtocolVersionV1 + from test_dataframe import TestPandasProtocolVersionV2 else: class TestNoPandas(unittest.TestCase): def test_no_pandas(self): @@ -249,7 +249,7 @@ def test_array_edge_cases(self): buf.row('large_array', columns={'col': large_arr}, at=qi.ServerTimestamp) def test_float_line_protocol_v1(self): - buf = qi.Buffer(line_protocol_version=qi.LineProtocolVersion.LineProtocolVersionV1) + buf = qi.Buffer(protocol_version=qi.ProtocolVersion.ProtocolVersionV1) buf.row('tbl1', columns={'num': 1.2345678901234567}, at=qi.ServerTimestamp) self.assertEqual(bytes(buf), b'tbl1 num' + _float_binary_bytes(1.2345678901234567, True) + b'\n') @@ -348,7 +348,8 @@ def test_basic(self): 'tcp', 'localhost', server.port, - bind_interface='0.0.0.0') as sender: + bind_interface='0.0.0.0', + protocol_version='2') as sender: server.accept() self.assertEqual(server.recv(), []) sender.row( @@ -396,9 +397,8 @@ def test_connect_close(self): def test_row_before_connect(self): try: sender = self.builder('tcp', 'localhost', 12345) - sender.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) with self.assertRaisesRegex(qi.IngressError, 'Not connected'): - sender.flush() + sender.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) finally: sender.close() @@ -458,7 +458,7 @@ def test_flush_4(self): sender.flush(buffer=None, clear=False) def test_two_rows_explicit_buffer(self): - with Server() as server, self.builder('tcp', 'localhost', server.port) as sender: + with Server() as server, self.builder('tcp', 'localhost', server.port, protocol_version='2') as sender: server.accept() self.assertEqual(server.recv(), []) buffer = sender.new_buffer() @@ -482,14 +482,14 @@ def test_two_rows_explicit_buffer(self): self.assertEqual(msgs, bexp) def test_independent_buffer(self): - buf = qi.Buffer() + buf = qi.Buffer(protocol_version=qi.ProtocolVersion.ProtocolVersionV2) buf.row('tbl1', symbols={'sym1': 'val1'}, at=qi.ServerTimestamp) exp = b'tbl1,sym1=val1\n' self.assertEqual(bytes(buf), exp) with Server() as server1, Server() as server2: - with self.builder('tcp', 'localhost', server1.port) as sender1, \ - self.builder('tcp', 'localhost', server2.port) as sender2: + with self.builder('tcp', 'localhost', server1.port, protocol_version='2') as sender1, \ + self.builder('tcp', 'localhost', server2.port, protocol_version='2') as sender2: server1.accept() server2.accept() @@ -619,7 +619,7 @@ def test_dont_flush_on_exception(self): @unittest.skipIf(not pd, 'pandas not installed') def test_dataframe(self): with Server() as server: - with self.builder('tcp', 'localhost', server.port) as sender: + with self.builder('tcp', 'localhost', server.port, protocol_version='2') as sender: server.accept() df = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) sender.dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) @@ -640,7 +640,8 @@ def test_dataframe_auto_flush(self): server.port, auto_flush_bytes=25, auto_flush_rows=False, - auto_flush_interval=False) as sender: + auto_flush_interval=False, + protocol_version='2') as sender: server.accept() df = pd.DataFrame({'a': [100000, 2], 'b': [3.0, 4.0]}) sender.dataframe(df, table_name='tbl1', at=qi.ServerTimestamp) @@ -672,17 +673,18 @@ def test_dataframe_auto_flush(self): sender.dataframe(df.head(1), table_name='tbl1', at=qi.ServerTimestamp) def test_new_buffer(self): - sender = self.builder( + with Server() as server: + with self.builder( protocol='tcp', host='localhost', - port=9009, + port=server.port, init_buf_size=1024, - max_name_len=10) - buffer = sender.new_buffer() - self.assertEqual(buffer.init_buf_size, 1024) - self.assertEqual(buffer.max_name_len, 10) - self.assertEqual(buffer.init_buf_size, sender.init_buf_size) - self.assertEqual(buffer.max_name_len, sender.max_name_len) + max_name_len=10) as sender: + buffer = sender.new_buffer() + self.assertEqual(buffer.init_buf_size, 1024) + self.assertEqual(buffer.max_name_len, 10) + self.assertEqual(buffer.init_buf_size, sender.init_buf_size) + self.assertEqual(buffer.max_name_len, sender.max_name_len) def test_connect_after_close(self): with Server() as server, self.builder('tcp', 'localhost', server.port) as sender: @@ -1010,7 +1012,7 @@ def test_http_request_min_throughput(self): 'localhost', server.port, request_timeout=1000, - default_line_protocol_version='v2', + protocol_version='2', # request_timeout is sufficiently high since it's also used as a connect timeout and we want to # survive hiccups on CI. it should be lower than the server delay though to actually test the # effect of request_min_throughput. @@ -1047,7 +1049,7 @@ def test_http_request_timeout(self): server.port, retry_timeout=0, request_min_throughput=0, # disable - default_line_protocol_version='v2', + protocol_version='2', request_timeout=datetime.timedelta(milliseconds=5)) as sender: # wait for 10ms in the server to simulate a slow response server.responses.append((20, 200, 'text/plain', b'OK')) @@ -1055,13 +1057,13 @@ def test_http_request_timeout(self): with self.assertRaisesRegex(qi.IngressError, 'timeout: per call'): sender.flush() - def test_wrong_config_default_line_protocol_version(self): - with self.assertRaisesRegex(qi.IngressError, '"default_line_protocol_version" must be None, "auto", "v1" or "v2"not \'v3\''): + def test_wrong_config_protocol_version(self): + with self.assertRaisesRegex(qi.IngressError, '"protocol_version" must be None, "auto", "1" or "2" not \'3\''): self.builder( 'http', 'localhost', 0, - default_line_protocol_version='v3') + protocol_version='3') def test_http_server_not_serve(self): with self.assertRaisesRegex(qi.IngressError, 'Failed to detect server\'s line protocol version, settings url: http://localhost:1234/settings'): @@ -1069,7 +1071,7 @@ def test_http_server_not_serve(self): 'http', 'localhost', 1234, - default_line_protocol_version='auto') as sender: + protocol_version='auto') as sender: sender.row('tbl1', columns={'x': 42}) def test_sender_connect_mock_old_server1(self): @@ -1114,7 +1116,7 @@ def test_sender_connect_mock_old_server3(self): self.assertEqual(server.requests[0], exp) def test_disable_line_protocol_validation(self): - with HttpServer() as server, self.builder('http', 'localhost', server.port, default_line_protocol_version='v1') as sender: + with HttpServer() as server, self.builder('http', 'localhost', server.port, protocol_version='1') as sender: buffer = sender.new_buffer() buffer.row( 'line_sender_buffer', @@ -1128,8 +1130,8 @@ def test_disable_line_protocol_validation(self): self.assertEqual(len(server.requests), 1) self.assertEqual(server.requests[0], exp) - def test_line_protocol_validation_on_tcp(self): - with Server() as server, self.builder('tcp', 'localhost', server.port, default_line_protocol_version='v1') as sender: + def test_line_protocol_version_on_tcp(self): + with Server() as server, self.builder('tcp', 'localhost', server.port, protocol_version='1') as sender: server.accept() self.assertEqual(server.recv(), []) buffer = sender.new_buffer() @@ -1138,12 +1140,12 @@ def test_line_protocol_validation_on_tcp(self): symbols={'id': 'Hola'}, columns={'qty': 3.5}, at=qi.TimestampNanos(111222233333)) - exp = b'line_sender_buffer_tcp_v1,id=Hola qty' + _float_binary_bytes(3.5, True) + b' 111222233333\n' + exp = b'line_sender_buffer_tcp_v1,id=Hola qty=3.5 111222233333\n' self.assertEqual(bytes(buffer), exp) sender.flush(buffer) self.assertEqual(server.recv()[0] + b'\n', exp) - with Server() as server, self.builder('tcp', 'localhost', server.port, default_line_protocol_version='v2') as sender: + with Server() as server, self.builder('tcp', 'localhost', server.port, protocol_version='2') as sender: server.accept() self.assertEqual(server.recv(), []) buffer = sender.new_buffer() @@ -1157,7 +1159,7 @@ def test_line_protocol_validation_on_tcp(self): sender.flush(buffer) self.assertEqual(server.recv()[0] + b'\n', exp) - with Server() as server, self.builder('tcp', 'localhost', server.port, default_line_protocol_version='auto') as sender: + with Server() as server, self.builder('tcp', 'localhost', server.port, protocol_version='auto') as sender: server.accept() self.assertEqual(server.recv(), []) buffer = sender.new_buffer() @@ -1166,12 +1168,13 @@ def test_line_protocol_validation_on_tcp(self): symbols={'id': 'Hola'}, columns={'qty': 3.5}, at=qi.TimestampNanos(111222233333)) - exp = b'line_sender_buffer_tcp_v1,id=Hola qty' + _float_binary_bytes(3.5) + b' 111222233333\n' + exp = b'line_sender_buffer_tcp_v1,id=Hola qty=3.5 111222233333\n' self.assertEqual(bytes(buffer), exp) sender.flush(buffer) self.assertEqual(server.recv()[0] + b'\n', exp)\ def _test_array_basic(self, arr: np.ndarray): + # http with HttpServer() as server, self.builder('http', 'localhost', server.port) as sender: sender.row( 'array_test', @@ -1182,7 +1185,8 @@ def _test_array_basic(self, arr: np.ndarray): self.assertEqual(len(server.requests), 1) self.assertEqual(server.requests[0], exp) - with Server() as server, self.builder('tcp', 'localhost', server.port) as sender: + #tcp + with Server() as server, self.builder('tcp', 'localhost', server.port, protocol_version='2') as sender: server.accept() self.assertEqual(server.recv(), []) sender.row( @@ -1327,7 +1331,7 @@ def encode_int_or_off(v): 'auto_flush_rows': encode_int_or_off, 'auto_flush_bytes': encode_int_or_off, 'auto_flush_interval': encode_duration_or_off, - 'default_line_protocol_version': str, + 'protocol_version': str, 'init_buf_size': str, 'max_name_len': str, } diff --git a/test/test_dataframe.py b/test/test_dataframe.py index 19f7447f..c21e7272 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -33,8 +33,8 @@ fastparquet = None -def _dataframe(line_protocol_version: qi.LineProtocolVersion, *args, **kwargs): - buf = qi.Buffer(line_protocol_version = line_protocol_version) +def _dataframe(protocol_version: qi.ProtocolVersion, *args, **kwargs): + buf = qi.Buffer(protocol_version = protocol_version) buf.dataframe(*args, **kwargs) return bytes(buf) @@ -187,9 +187,9 @@ def test_basic(self): at=-1) self.assertEqual( buf, - b't1,A=a1,B=b1,C=b1,D=a1 E' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',F=1i 1520640000000000000\n' + - b't2,A=a2,D=a2 E' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',F=2i 1520726400000000000\n' + - b't1,A=a3,B=b3,C=b3,D=a3 E' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',F=3i 1520812800000000000\n') + b't1,A=a1,B=b1,C=b1,D=a1 E' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',F=1i 1520640000000000000\n' + + b't2,A=a2,D=a2 E' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',F=2i 1520726400000000000\n' + + b't1,A=a3,B=b3,C=b3,D=a3 E' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',F=3i 1520812800000000000\n') def test_named_dataframe(self): df = pd.DataFrame({ @@ -408,12 +408,12 @@ def test_u64_numpy_col(self): b'tbl1 a=0i\n' + b'tbl1 a=9223372036854775807i\n') - buf = qi.Buffer(line_protocol_version=self.version) + buf = qi.Buffer(protocol_version=self.version) buf.dataframe(pd.DataFrame({'b': [.5, 1.0, 1.5]}), table_name='tbl2', at=qi.ServerTimestamp) exp1 = ( - b'tbl2 b' + _float_binary_bytes(0.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl2 b' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl2 b' + _float_binary_bytes(1.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + b'tbl2 b' + _float_binary_bytes(0.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl2 b' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl2 b' + _float_binary_bytes(1.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n') self.assertEqual( bytes(buf), exp1) @@ -460,14 +460,14 @@ def test_f32_numpy_col(self): buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(3.4028234663852886e38, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(3.4028234663852886e38, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n') def test_f64_numpy_col(self): df = pd.DataFrame({'a': pd.Series([ @@ -481,14 +481,14 @@ def test_f64_numpy_col(self): buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(float('NAN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(float('NAN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n') def test_u8_arrow_col(self): df = pd.DataFrame({ @@ -675,14 +675,14 @@ def test_f32_arrow_col(self): buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="a"\n' + - b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="b"\n' + - b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="c"\n' + - b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="d"\n' + - b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="e"\n' + - b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="f"\n' + + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="a"\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="b"\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="c"\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="d"\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="e"\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="f"\n' + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - b'tbl1 a' + _float_binary_bytes(3.4028234663852886e38, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="h"\n' + + b'tbl1 a' + _float_binary_bytes(3.4028234663852886e38, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="h"\n' + b'tbl1 b="i"\n') def test_f64_arrow_col(self): @@ -700,14 +700,14 @@ def test_f64_arrow_col(self): buf = _dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp) self.assertEqual( buf, - b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="a"\n' + - b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="b"\n' + - b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="c"\n' + - b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="d"\n' + - b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="e"\n' + - b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="f"\n' + + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="a"\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="b"\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="c"\n' + + b'tbl1 a' + _float_binary_bytes(0.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="d"\n' + + b'tbl1 a' + _float_binary_bytes(float('inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="e"\n' + + b'tbl1 a' + _float_binary_bytes(float('-inf'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="f"\n' + b'tbl1 b="g"\n' + # This one is wierd: `nan` gets 0 in the bitmask. - b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b="h"\n' + + b'tbl1 a' + _float_binary_bytes(1.7976931348623157e308, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b="h"\n' + b'tbl1 b="i"\n') def test_bool_numpy_col(self): @@ -1311,13 +1311,13 @@ def test_pyobj_float_col(self): dtype='object'), 'b': [1, 2, 3, 4, 5, 6, 7]}), table_name='tbl1', at = qi.ServerTimestamp), - b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=1i\n' + - b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=2i\n' + - b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=3i\n' + + b'tbl1 a' + _float_binary_bytes(1.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b=1i\n' + + b'tbl1 a' + _float_binary_bytes(2.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b=2i\n' + + b'tbl1 a' + _float_binary_bytes(3.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b=3i\n' + b'tbl1 b=4i\n' + - b'tbl1 a' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=5i\n' + + b'tbl1 a' + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b=5i\n' + b'tbl1 b=6i\n' + - b'tbl1 a' + _float_binary_bytes(7.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',b=7i\n') + b'tbl1 a' + _float_binary_bytes(7.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',b=7i\n') with self.assertRaisesRegex( qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."): @@ -1583,23 +1583,23 @@ def df_eq(exp_df, deser_df, exp_dtypes): df_eq(df, fp2fp_df, exp_dtypes) exp = ( - b'tbl1,s=a a=1i,b=10i,c' + _float_binary_bytes(0.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1,s=b a=2i,b=20i,c' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1,s=a a=3i,b=30i,c' + _float_binary_bytes(2.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1,s=c a=4i,c' + _float_binary_bytes(3.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1,s=a a=5i,b=50i,c' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + b'tbl1,s=a a=1i,b=10i,c' + _float_binary_bytes(0.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1,s=b a=2i,b=20i,c' + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1,s=a a=3i,b=30i,c' + _float_binary_bytes(2.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1,s=c a=4i,c' + _float_binary_bytes(3.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1,s=a a=5i,b=50i,c' + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n') fallback_exp = ( - b'tbl1 s="a",a=1i,b' + _float_binary_bytes(10.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + - _float_binary_bytes(0.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 s="b",a=2i,b' + _float_binary_bytes(20.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + - _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 s="a",a=3i,b' + _float_binary_bytes(30.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + - _float_binary_bytes(2.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 s="c",a=4i,b' + _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + - _float_binary_bytes(3.5, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n' + - b'tbl1 s="a",a=5i,b' + _float_binary_bytes(50.0, self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b',c' + - _float_binary_bytes(float('NaN'), self.version == qi.LineProtocolVersion.LineProtocolVersionV1) + b'\n') + b'tbl1 s="a",a=1i,b' + _float_binary_bytes(10.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',c' + + _float_binary_bytes(0.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 s="b",a=2i,b' + _float_binary_bytes(20.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',c' + + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 s="a",a=3i,b' + _float_binary_bytes(30.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',c' + + _float_binary_bytes(2.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 s="c",a=4i,b' + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',c' + + _float_binary_bytes(3.5, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n' + + b'tbl1 s="a",a=5i,b' + _float_binary_bytes(50.0, self.version == qi.ProtocolVersion.ProtocolVersionV1) + b',c' + + _float_binary_bytes(float('NaN'), self.version == qi.ProtocolVersion.ProtocolVersionV1) + b'\n') self.assertEqual(_dataframe(self.version, df, table_name='tbl1', at=qi.ServerTimestamp), exp) self.assertEqual(_dataframe(self.version, pa2pa_df, table_name='tbl1', at=qi.ServerTimestamp), exp) @@ -1611,7 +1611,7 @@ def test_f64_np_array(self): df = pd.DataFrame({ 'a': [np.array([1.0], np.float64), np.array([2.0], np.float64), np.array([3.0], np.float64)]}) - if self.version == qi.LineProtocolVersion.LineProtocolVersionV1: + if self.version == qi.ProtocolVersion.ProtocolVersionV1: with self.assertRaisesRegex( qi.IngressError, "line protocol version v1 does not support array datatype"): @@ -1624,13 +1624,13 @@ def test_f64_np_array(self): b'tbl1 a=' + _array_binary_bytes(np.array([2.0], np.float64)) + b'\n' + b'tbl1 a=' + _array_binary_bytes(np.array([3.0], np.float64)) + b'\n') -class TestPandasLineProtocolVersionV1(TestPandasBase.TestPandas): +class TestPandasProtocolVersionV1(TestPandasBase.TestPandas): name = 'init' - version = qi.LineProtocolVersion.LineProtocolVersionV1 + version = qi.ProtocolVersion.ProtocolVersionV1 -class TestPandasLineProtocolVersionV2(TestPandasBase.TestPandas): +class TestPandasProtocolVersionV2(TestPandasBase.TestPandas): name = 'init' - version = qi.LineProtocolVersion.LineProtocolVersionV2 + version = qi.ProtocolVersion.ProtocolVersionV2 if __name__ == '__main__': if os.environ.get('TEST_QUESTDB_PROFILE') == '1':