Skip to content

Commit 30c5488

Browse files
committed
PyCLIF behavior changes: Safer Python-to-C++ std::vector, std::set, std::map conversions.
**Please request a rollback only as a last resort**, to buy time for working out a forward fix. Forward fixes are almost certainly trivial: Based on the Python traceback, insert an explicit conversion to `tuple()`, `set()`, or `dict()` (e.g. cl/538602196), or change a Python `set` or `list` literal to the correct type (e.g. cl/538563104). Motivations for the behavior changes: * Improves safety / prevents accidents: * For C++ `std::set` (and `std::unordered_set`): The potentially reducing conversion from a Python sequence (e.g. Python `list` or `tuple`) to a C++ set must be explicit. * For C++ `std::vector` (and `std::array`): In very rare cases (see b/284333274#comment11) the conversion from a Python iterable must be explicit. * For C++ `std::map` (and `std::unordered_map`): The conditions added in this CL (`clif::PyObjectTypeIsConvertibleToStdMap()`) did not break any existing unit tests. * Improves readability: Python literals to be passed as C++ sets must be set literals. * Note for completeness: A Python set can still be passed to a C++ `std::vector`. The rationale is that this conversion is not reducing. Implicit conversions of this kind are also fairly commonly used in google3, therefore enforcing explicit conversions has an unfavorable cost : benefit ratio. * Original trigger for this work: Converge pybind11 & PyCLIF behavior (go/pyclif_pybind11_fusion). * Note that pybind11 is very far on the strict side of the spectrum. See also pybind/pybind11#4686. * PyCLIF was originally extremely far on the permissive side of the spectrum. This CL is already the 2nd cleanup step: prior to cl/525187106 a conversion from any zero-length iterator (e.g. the `{}` empty dict literal) to a C++ `std::vector` or `std::set` was possible. This CL was started from cl/535028028, but the changes under google3/third_party/absl/python were backed out, the change for `std::array` in stltypes.h was added. OSS build tested interactively with copybara to local directory & cmake commands in Ubuntu 20.04 / Python 3.9 docker container. PiperOrigin-RevId: 538823846
1 parent c6974a2 commit 30c5488

File tree

7 files changed

+123
-27
lines changed

7 files changed

+123
-27
lines changed

clif/python/runtime.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "clif/python/runtime.h"
1616

17+
#include <initializer_list>
1718
#include <system_error> // NOLINT(build/c++11)
1819

1920
#include "clif/python/pickle_support.h"
@@ -384,4 +385,42 @@ PyObject* ReduceExImpl(PyObject* self, PyObject* args, PyObject* kw) {
384385
return ReduceExCore(self, protocol);
385386
}
386387

388+
namespace {
389+
390+
bool PyObjectIsInstanceWithOneOfTpNames(
391+
PyObject* obj, std::initializer_list<const char*> tp_names) {
392+
if (PyType_Check(obj)) {
393+
return false;
394+
}
395+
const char* obj_tp_name = Py_TYPE(obj)->tp_name;
396+
for (const auto* tp_name : tp_names) {
397+
if (strcmp(obj_tp_name, tp_name) == 0) {
398+
return true;
399+
}
400+
}
401+
return false;
402+
}
403+
404+
} // namespace
405+
406+
bool PyObjectTypeIsConvertibleToStdVector(PyObject* obj) {
407+
if (PySequence_Check(obj) != 0) {
408+
return !PyUnicode_Check(obj) && !PyBytes_Check(obj);
409+
}
410+
return (PyGen_Check(obj) != 0) || (PyAnySet_Check(obj) != 0) ||
411+
PyObjectIsInstanceWithOneOfTpNames(
412+
obj, {"dict_keys", "dict_values", "dict_items", "map", "zip"});
413+
}
414+
415+
bool PyObjectTypeIsConvertibleToStdSet(PyObject* obj) {
416+
return (PyAnySet_Check(obj) != 0) ||
417+
PyObjectIsInstanceWithOneOfTpNames(obj, {"dict_keys"});
418+
}
419+
420+
bool PyObjectTypeIsConvertibleToStdMap(PyObject* obj) {
421+
return (PyDict_Check(obj) != 0) ||
422+
((PyMapping_Check(obj) != 0) &&
423+
(PyObject_HasAttrString(obj, "items") != 0));
424+
}
425+
387426
} // namespace clif

clif/python/runtime.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ bool ensure_no_args_and_kw_args(const char* func, PyObject* args, PyObject* kw);
185185
// https://docs.python.org/3/library/pickle.html#object.__reduce_ex__
186186
PyObject* ReduceExImpl(PyObject* self, PyObject* args, PyObject* kw);
187187

188+
bool PyObjectTypeIsConvertibleToStdVector(PyObject* obj);
189+
bool PyObjectTypeIsConvertibleToStdSet(PyObject* obj);
190+
bool PyObjectTypeIsConvertibleToStdMap(PyObject* obj);
191+
188192
} // namespace clif
189193

190194
#endif // CLIF_PYTHON_RUNTIME_H_

clif/python/stltypes.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ typename std::enable_if<(I < sizeof...(U)), bool>::type //
222222
PyObjAsVariantTryAll(PyObject* py, ::std::variant<U...>* v) {
223223
{
224224
bool success = PyObjAsVariantAt<I>(py, v);
225-
DCHECK(success || PyErr_Occurred() != nullptr);
226225
if (success) {
227226
return true;
228227
}
@@ -936,10 +935,13 @@ bool IterToCont(PyObject* py, Inserter add, bool accept_dict = false) {
936935
// to a C++ container via functor add.
937936
template <typename T, typename U, typename F>
938937
bool ItemsToMap(PyObject* py, F add) {
939-
py = PyObject_CallMethod(py, "items", nullptr);
940-
if (py == nullptr) return false;
941-
bool ok = py::IterToCont<std::pair<T, U>>(py, add, true);
942-
Py_DECREF(py);
938+
if (!PyObjectTypeIsConvertibleToStdMap(py)) {
939+
return false;
940+
}
941+
PyObject* items = PyObject_CallMethod(py, "items", nullptr);
942+
if (items == nullptr) return false;
943+
bool ok = py::IterToCont<std::pair<T, U>>(items, add, true);
944+
Py_DECREF(items);
943945
return ok;
944946
}
945947
} // namespace py
@@ -948,6 +950,9 @@ bool ItemsToMap(PyObject* py, F add) {
948950
template <typename T, typename... Args>
949951
bool Clif_PyObjAs(PyObject* py, std::vector<T, Args...>* c) {
950952
CHECK(c != nullptr);
953+
if (!PyObjectTypeIsConvertibleToStdVector(py)) {
954+
return false;
955+
}
951956
c->clear();
952957
return py::IterToCont<T>(py, [&c](T&& i) { // NOLINT: build/c++11
953958
c->push_back(std::move(i));
@@ -957,7 +962,9 @@ bool Clif_PyObjAs(PyObject* py, std::vector<T, Args...>* c) {
957962
template <typename T, std::size_t N>
958963
bool Clif_PyObjAs(PyObject* py, std::array<T, N>* c) {
959964
CHECK(c != nullptr);
960-
965+
if (!PyObjectTypeIsConvertibleToStdVector(py)) {
966+
return false;
967+
}
961968
int index = 0;
962969
bool rval = py::IterToCont<T>(py, [&c, &index](T&& i) {
963970
if (index < N) {
@@ -976,6 +983,9 @@ bool Clif_PyObjAs(PyObject* py, std::array<T, N>* c) {
976983
template <typename T, typename... Args>
977984
bool Clif_PyObjAs(PyObject* py, std::unordered_set<T, Args...>* c) {
978985
CHECK(c != nullptr);
986+
if (!PyObjectTypeIsConvertibleToStdSet(py)) {
987+
return false;
988+
}
979989
c->clear();
980990
return py::IterToCont<T>(py, [&c](T&& i) { // NOLINT: build/c++11
981991
c->insert(std::move(i));
@@ -984,6 +994,9 @@ bool Clif_PyObjAs(PyObject* py, std::unordered_set<T, Args...>* c) {
984994
template <typename T, typename... Args>
985995
bool Clif_PyObjAs(PyObject* py, std::set<T, Args...>* c) {
986996
CHECK(c != nullptr);
997+
if (!PyObjectTypeIsConvertibleToStdSet(py)) {
998+
return false;
999+
}
9871000
c->clear();
9881001
return py::IterToCont<T>(py, [&c](T&& i) { // NOLINT: build/c++11
9891002
c->insert(std::move(i));

clif/testing/python/lambda_expressions_test.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,31 +149,36 @@ def test_extended_ctor_accept_iterable_as_vector(self, iterable):
149149
self.assertCountEqual(iterable, obj.value)
150150

151151
def test_accept_iterable_as_set(self, iterable):
152-
self.assertEqual(lambda_expressions.takes_set(iterable), 3)
153-
self.assertEqual(lambda_expressions.takes_unordered_set(iterable), 3)
154-
self.assertEqual(lambda_expressions.takes_set((i for i in iterable)), 3)
155-
self.assertEqual(lambda_expressions.takes_unordered_set(
156-
(i for i in iterable)), 3)
152+
# Explicit set() added with cl/536221955.
153+
self.assertEqual(lambda_expressions.takes_set(set(iterable)), 3)
154+
self.assertEqual(lambda_expressions.takes_unordered_set(set(iterable)), 3)
155+
self.assertEqual(lambda_expressions.takes_set(set(i for i in iterable)), 3)
156+
self.assertEqual(
157+
lambda_expressions.takes_unordered_set(set(i for i in iterable)), 3
158+
)
157159

158160
def test_ctor_accept_iterable_as_set(self, iterable):
159-
obj = lambda_expressions.CtorTakesSet(iterable)
161+
# Explicit set() added with cl/536221955.
162+
obj = lambda_expressions.CtorTakesSet(set(iterable))
160163
self.assertCountEqual(iterable, obj.value)
161-
obj = lambda_expressions.CtorTakesUnorderedSet(iterable)
164+
obj = lambda_expressions.CtorTakesUnorderedSet(set(iterable))
162165
self.assertCountEqual(iterable, obj.value)
163-
obj = lambda_expressions.CtorTakesSet((i for i in iterable))
166+
obj = lambda_expressions.CtorTakesSet(set(i for i in iterable))
164167
self.assertCountEqual(iterable, obj.value)
165-
obj = lambda_expressions.CtorTakesUnorderedSet((i for i in iterable))
168+
obj = lambda_expressions.CtorTakesUnorderedSet(set(i for i in iterable))
166169
self.assertCountEqual(iterable, obj.value)
167170

168171
def test_extended_ctor_accept_iterable_as_set(self, iterable):
169-
obj = lambda_expressions.ExtendedCtorTakesSet(iterable)
172+
# Explicit set() added with cl/536221955.
173+
obj = lambda_expressions.ExtendedCtorTakesSet(set(iterable))
170174
self.assertCountEqual(iterable, obj.value)
171-
obj = lambda_expressions.ExtendedCtorTakesUnorderedSet(iterable)
175+
obj = lambda_expressions.ExtendedCtorTakesUnorderedSet(set(iterable))
172176
self.assertCountEqual(iterable, obj.value)
173-
obj = lambda_expressions.ExtendedCtorTakesSet((i for i in iterable))
177+
obj = lambda_expressions.ExtendedCtorTakesSet(set(i for i in iterable))
174178
self.assertCountEqual(iterable, obj.value)
175179
obj = lambda_expressions.ExtendedCtorTakesUnorderedSet(
176-
(i for i in iterable))
180+
set(i for i in iterable)
181+
)
177182
self.assertCountEqual(iterable, obj.value)
178183

179184

clif/testing/python/std_containers.clif

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ use `std::string` as str
1616

1717
from "clif/testing/std_containers.h":
1818
def PassVectorInt(v: list<int>) -> int
19+
def PassArrayInt2(a: `std::array<int, 2>` as list) -> int
20+
def PassVectorPairInt(v: list<tuple<int, int>>) -> int
1921
def PassSetInt(v: `std::set` as set<int>) -> int
2022
def Mul(v: list<int>, m: int) -> list<int>
2123
def Div(v: `std::array<int, 2>` as list, m: int) -> `std::array<int, 2>` as list

clif/testing/python/std_containers_test.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,44 @@
1515
"""Tests for clif.testing.python.std_containers."""
1616

1717
from absl.testing import absltest
18+
from absl.testing import parameterized
1819

1920
from clif.testing.python import std_containers
2021

2122

22-
class StdContainersTest(absltest.TestCase):
23+
class StdContainersTest(parameterized.TestCase):
2324

24-
def testPassVectorInt(self):
25-
self.assertEqual(std_containers.PassVectorInt([7, 13]), 140)
26-
self.assertEqual(std_containers.PassVectorInt(set([9, 12, 9])), 142)
25+
@parameterized.parameters(
26+
(std_containers.PassVectorInt, 0), (std_containers.PassArrayInt2, 1)
27+
)
28+
def testPassVectorInt(self, fn, offset):
29+
self.assertEqual(fn([7, 13]), 140 + offset)
30+
self.assertEqual(fn({6, 2}), 116 + offset)
31+
self.assertEqual(fn({'x': 8, 'y': 11}.values()), 138 + offset)
32+
self.assertEqual(fn({3: None, 9: None}.keys()), 124 + offset)
33+
self.assertEqual(fn(i for i in [4, 17]), 142 + offset)
34+
self.assertEqual(fn(map(lambda i: i * 3, [8, 7])), 190 + offset)
2735
with self.assertRaises(TypeError):
28-
std_containers.PassVectorInt({})
36+
fn({'x': 0, 'y': 1})
37+
with self.assertRaises(TypeError):
38+
fn({})
39+
40+
def testPassVectorPairInt(self):
41+
fn = std_containers.PassVectorPairInt
42+
self.assertEqual(fn(zip([5, 17], [13, 9])), 2222)
2943

3044
def testPassSetInt(self):
31-
self.assertEqual(std_containers.PassSetInt(set([3, 15, 3])), 254)
32-
self.assertEqual(std_containers.PassSetInt([5, 17, 5]), 266)
45+
fn = std_containers.PassSetInt
46+
self.assertEqual(fn({3, 15}), 254)
47+
self.assertEqual(fn({5: None, 12: None}.keys()), 251)
48+
with self.assertRaises(TypeError):
49+
fn([])
50+
with self.assertRaises(TypeError):
51+
fn({})
52+
with self.assertRaises(TypeError):
53+
fn({}.values())
3354
with self.assertRaises(TypeError):
34-
std_containers.PassSetInt({})
55+
fn(i for i in [])
3556

3657
def testVector(self):
3758
self.assertEqual(std_containers.Mul([1, 2, 3], 2), [2, 4, 6])

clif/testing/std_containers.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ inline int PassVectorInt(const std::vector<int>& v) {
3535
return zum;
3636
}
3737

38+
inline int PassArrayInt2(const std::array<int, 2>& a) {
39+
return PassVectorInt(std::vector<int>(a.begin(), a.end())) + 1;
40+
}
41+
42+
inline int PassVectorPairInt(const std::vector<std::pair<int, int>>& v) {
43+
int zum = 0;
44+
for (const auto& ij : v) {
45+
zum += ij.first * 100 + ij.second;
46+
}
47+
return zum;
48+
}
49+
3850
inline int PassSetInt(const std::set<int>& s) {
3951
int zum = 200;
4052
for (const int i : s) {

0 commit comments

Comments
 (0)