Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mypy/typeshed/stubs/librt/librt/strings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ def write_f64_le(b: BytesWriter, n: float, /) -> None: ...
def write_f64_be(b: BytesWriter, n: float, /) -> None: ...
def read_f64_le(b: bytes, index: i64, /) -> float: ...
def read_f64_be(b: bytes, index: i64, /) -> float: ...

# Codepoint classification helpers operating on i32 codepoints (typically
# obtained via ord(s[i])). Negative inputs return False.
def isspace(c: i32, /) -> bool: ...
1 change: 1 addition & 0 deletions mypyc/ir/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,5 @@ def get_header(self) -> str:
STRING_WRITER_EXTRA_OPS: Final = SourceDep("stringwriter_extra_ops.c")
BYTEARRAY_EXTRA_OPS: Final = SourceDep("bytearray_extra_ops.c")
STR_EXTRA_OPS: Final = SourceDep("str_extra_ops.c")
CODEPOINT_EXTRA_OPS: Final = SourceDep("codepoint_extra_ops.c")
VECS_EXTRA_OPS: Final = SourceDep("vecs_extra_ops.c")
8 changes: 8 additions & 0 deletions mypyc/lib-rt/codepoint_extra_ops.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "codepoint_extra_ops.h"

// Out-of-line bodies for codepoint helpers that are too large to inline.
// The classification helpers and the ASCII fast paths for case conversion
// stay inline in codepoint_extra_ops.h; this file holds the slow paths
// that round-trip through PyUnicode_FromOrdinal and CPython's Unicode
// machinery. Currently empty; populated as later commits add
// isidentifier, toupper, and tolower.
16 changes: 16 additions & 0 deletions mypyc/lib-rt/codepoint_extra_ops.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef MYPYC_CODEPOINT_EXTRA_OPS_H
#define MYPYC_CODEPOINT_EXTRA_OPS_H

#include <Python.h>
#include <stdbool.h>
#include <stdint.h>

// Codepoint helpers for librt.strings.
// Inputs are signed int32_t for compatibility with mypyc's i32 type.
// Negative values are treated as non-codepoints and return false.

static inline bool LibRTStrings_IsSpace(int32_t c) {
return c >= 0 && Py_UNICODE_ISSPACE((Py_UCS4)c);
}

#endif // MYPYC_CODEPOINT_EXTRA_OPS_H
38 changes: 38 additions & 0 deletions mypyc/lib-rt/strings/librt_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <Python.h>
#include <stdint.h>
#include "CPy.h"
#include "codepoint_extra_ops.h"
#include "librt_strings.h"

#define CPY_BOOL_ERROR 2
Expand Down Expand Up @@ -1153,6 +1154,40 @@ read_f64_be(PyObject *module, PyObject *const *args, size_t nargs) {
return PyFloat_FromDouble(CPyBytes_ReadF64BEUnsafe(data + index));
}

// Codepoint classification helpers exposed to interpreted callers.
// The C-side names are prefixed `cp_` to avoid colliding with libc's
// <ctype.h> isspace / isdigit / etc. Compiled callers go through the
// LibRTStrings_* static inlines in codepoint_extra_ops.h instead.
//
// All wrappers parse a single int argument as i32 (codepoint) and
// dispatch to the corresponding LibRTStrings_* function. The parse
// step accepts any int but rejects values outside the i32 range with
// OverflowError, matching the input domain of the compiled fast path.

#define CP_PARSE_I32(arg, var) \
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could this be a regular function instead of a macro? i think arg will always be PyObject * and var is just a variable name, is there point in even having that as an argument?

int32_t var; \
do { \
int _overflow; \
long _c = PyLong_AsLongAndOverflow((arg), &_overflow); \
if (_c == -1 && PyErr_Occurred()) \
return NULL; \
if (_overflow != 0 || _c < INT32_MIN || _c > INT32_MAX) { \
PyErr_SetString(PyExc_OverflowError, \
"codepoint out of i32 range"); \
return NULL; \
} \
(var) = (int32_t)_c; \
} while (0)

#define DEFINE_CP_BOOL_WRAPPER(name, fn) \
static PyObject* \
cp_##name(PyObject *module, PyObject *arg) { \
CP_PARSE_I32(arg, c); \
return PyBool_FromLong(fn(c)); \
}

DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace)

static PyMethodDef librt_strings_module_methods[] = {
{"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL,
PyDoc_STR("Write a 16-bit signed integer to BytesWriter in little-endian format")
Expand Down Expand Up @@ -1214,6 +1249,9 @@ static PyMethodDef librt_strings_module_methods[] = {
{"read_f64_be", (PyCFunction) read_f64_be, METH_FASTCALL,
PyDoc_STR("Read a 64-bit float from bytes in big-endian format")
},
{"isspace", cp_isspace, METH_O,
PyDoc_STR("Test whether a codepoint (i32) is Unicode whitespace.")
},
{NULL, NULL, 0, NULL}
};

Expand Down
19 changes: 18 additions & 1 deletion mypyc/primitives/librt_strings_ops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from mypyc.ir.deps import BYTES_WRITER_EXTRA_OPS, LIBRT_STRINGS, STRING_WRITER_EXTRA_OPS
from mypyc.ir.deps import (
BYTES_WRITER_EXTRA_OPS,
CODEPOINT_EXTRA_OPS,
LIBRT_STRINGS,
STRING_WRITER_EXTRA_OPS,
)
from mypyc.ir.ops import ERR_MAGIC, ERR_MAGIC_OVERLAPPING, ERR_NEVER
from mypyc.ir.rtypes import (
bool_rprimitive,
Expand Down Expand Up @@ -387,3 +392,15 @@
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, STRING_WRITER_EXTRA_OPS],
)


# Codepoint classification helpers operating on i32 codepoints
# (typically obtained via ord(s[i])). Negative inputs return False.
function_op(
name="librt.strings.isspace",
arg_types=[int32_rprimitive],
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsSpace",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
)
14 changes: 14 additions & 0 deletions mypyc/test-data/irbuild-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,17 @@ L1:
L2:
r3 = CPyStringWriter_GetItem(s, r0)
return r3

[case testLibrtStringsIsSpaceIR]
from librt.strings import isspace
from mypy_extensions import i32

def is_ws(c: i32) -> bool:
return isspace(c)
Comment on lines +278 to +279
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe also test something more complicated like isspace(ord(s[i])) where s is a string.

[out]
def is_ws(c):
c :: i32
r0 :: bool
L0:
r0 = LibRTStrings_IsSpace(c)
return r0
17 changes: 17 additions & 0 deletions mypyc/test-data/run-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -1439,3 +1439,20 @@ def test_new_without_init_is_usable() -> None:
assert sw.getvalue() == ""
sw.write("hello")
assert sw.getvalue() == "hello"

[case testLibrtStringsIsSpace_librt]
from typing import Any
from mypy_extensions import i32
from librt.strings import isspace


def test_isspace() -> None:
assert not isspace(i32(-1))
assert not isspace(i32(-113))
# Verify our codepoint primitive agrees with str.isspace() across all
# Unicode codepoints, including the ord(chr(i)) round-trip. Any
# forces generic dispatch on the str side.
for i in range(0x110000):
c = chr(i)
a: Any = c
assert isspace(ord(c)) == isspace(i) == a.isspace()
Loading