Skip to content

Commit 3246a66

Browse files
authored
Merge pull request #4 from yilun11/CVE-2020-13091
CVE 2020 13091
2 parents 874a4ca + 3247cd9 commit 3246a66

File tree

7 files changed

+179
-15
lines changed

7 files changed

+179
-15
lines changed

INFO

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
### tests
2+
3+
updated /tests/io/test_pickle.py
4+
did not write the compatibility tests for older pandas versions yet
5+
6+
###
7+
8+
# call this file pickle_config.yml
9+
# this is meant to configure pickle deserialization, which can be a security risk
10+
11+
# mode can be permit, deny, off
12+
13+
# permit means pickle is only allowed to load permitted libraries (safer option)
14+
# deny means pickle is not allowed to load denied libraries (less safe option)
15+
# off means no security and should only be used on safe pickles (least safe option)
16+
mode: permit
17+
18+
# here you would list packages and classes that can be used
19+
# this is how Python recommend you restrict pickling globals
20+
# see https://docs.python.org/3/library/pickle.html#restricting-globals
21+
permit:
22+
builtins: ['range', 'complex', 'set', 'frozenset', 'slice']
23+
24+
# here you would list packages and classes than cannot be used
25+
# all others will be allowed if running on deny mode
26+
# this is not as safe as the permit method
27+
# see https://pythonmana.com/2022/143/202205231222219535.html
28+
deny:
29+
builtins: ['eval', 'exec', 'execfile', 'compile', 'open', 'input', '__import__', 'exit']
30+
os: ['system']

pandas/compat/pickle_compat.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
import numpy as np
1717

18+
from pandas._config import get_option
19+
1820
from pandas._libs.arrays import NDArrayBacked
1921
from pandas._libs.tslibs import BaseOffset
2022

@@ -196,14 +198,28 @@ def __new__(cls) -> DataFrame: # type: ignore[misc]
196198

197199
# our Unpickler sub-class to override methods and some dispatcher
198200
# functions for compat and uses a non-public class of the pickle module.
201+
# checks modules against permit/deny list and raises error if module is not forbidden.
199202

200203

201204
class Unpickler(pkl._Unpickler):
202205
def find_class(self, module, name):
203206
# override superclass
204207
key = (module, name)
205208
module, name = _class_locations_map.get(key, key)
206-
return super().find_class(module, name)
209+
opt = get_option("pickler.unpickle.mode")
210+
# Only allow safe modules and classes. Tuples defined in config
211+
# Do not allow unsafe modules and classes.
212+
if (
213+
(opt == "off")
214+
or (opt == "permit" and (module, name) in get_option("pickler.safe.tuples"))
215+
or (
216+
opt == "deny"
217+
and (module, name) not in get_option("pickler.unsafe.tuples")
218+
)
219+
):
220+
return super().find_class(module, name)
221+
# Forbid everything else.
222+
raise pkl.UnpicklingError(f"global '{module} . {name}' is forbidden")
207223

208224

209225
Unpickler.dispatch = copy.copy(Unpickler.dispatch)

pandas/core/config_init.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from typing import Callable
1717
import warnings
1818

19+
import yaml
20+
1921
import pandas._config.config as cf
2022
from pandas._config.config import (
2123
is_bool,
@@ -970,3 +972,78 @@ def register_converter_cb(key) -> None:
970972
styler_environment,
971973
validator=is_instance_factory([type(None), str]),
972974
)
975+
976+
# ------
977+
# Pickler
978+
# ------
979+
980+
pickler_unpickle_mode = """
981+
: str
982+
Determine which mode to use in {"off", "permit", "deny"}.
983+
"""
984+
985+
pickler_safe_tuples = """
986+
: array
987+
Used when pickler.unpickle.mode is "permit"
988+
Array of safe tuples, e.g. [("builtins", "range"), ("builtins", "complex")]
989+
"""
990+
991+
pickler_unsafe_tuples = """
992+
: array
993+
Used when pickler.unpickle.mode is "deny"
994+
Array of unsafe tuples, e.g. [("os", "system"), ("joblib", "load")]
995+
"""
996+
997+
# location of config file from env var
998+
str_loc = os.environ.get("PANDAS_UNPICKLE_SECURE", "pickle_config.yml")
999+
safe_tuples = []
1000+
unsafe_tuples = []
1001+
1002+
if os.path.exists(str_loc):
1003+
pickle_config = yaml.load(open(str_loc), Loader=yaml.SafeLoader)
1004+
1005+
if pickle_config["mode"] == "permit":
1006+
for k, v in pickle_config["permit"].items():
1007+
for i in v:
1008+
safe_tuples.append((k, i))
1009+
elif pickle_config["mode"] == "deny":
1010+
for k, v in pickle_config["deny"].items():
1011+
for i in v:
1012+
unsafe_tuples.append((k, i))
1013+
else:
1014+
pickle_config = {}
1015+
pickle_config["mode"] = "deny"
1016+
# see deny list example at https://pythonmana.com/2022/143/202205231222219535.html
1017+
unsafe_tuples = [
1018+
("os", "system"),
1019+
("posix", "system"),
1020+
("builtins", "eval"),
1021+
("builtins", "exec"),
1022+
("builtins", "execfile"),
1023+
("builtins", "compile"),
1024+
("builtins", "open"),
1025+
("builtins", "import"),
1026+
("builtins", "__import__"),
1027+
("builtins", "exit"),
1028+
]
1029+
1030+
with cf.config_prefix("pickler"):
1031+
cf.register_option(
1032+
"unpickle.mode",
1033+
# get the default value from the config file
1034+
pickle_config["mode"],
1035+
pickler_unpickle_mode,
1036+
validator=is_one_of_factory(["off", "permit", "deny"]),
1037+
)
1038+
1039+
cf.register_option(
1040+
"safe.tuples",
1041+
safe_tuples,
1042+
pickler_safe_tuples,
1043+
)
1044+
1045+
cf.register_option(
1046+
"unsafe.tuples",
1047+
unsafe_tuples,
1048+
pickler_unsafe_tuples,
1049+
)

pandas/io/pickle.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from typing import Any
66
import warnings
77

8+
from pandas._config import get_option
9+
810
from pandas._typing import (
911
CompressionOptions,
1012
FilePath,
@@ -101,15 +103,8 @@ def to_pickle(
101103
is_text=False,
102104
storage_options=storage_options,
103105
) as handles:
104-
if handles.compression["method"] in ("bz2", "xz") and protocol >= 5:
105-
# some weird TypeError GH#39002 with pickle 5: fallback to letting
106-
# pickle create the entire object and then write it to the buffer.
107-
# "zip" would also be here if pandas.io.common._BytesZipFile
108-
# wouldn't buffer write calls
109-
handles.handle.write(pickle.dumps(obj, protocol=protocol))
110-
else:
111-
# letting pickle write directly to the buffer is more memory-efficient
112-
pickle.dump(obj, handles.handle, protocol=protocol)
106+
# letting pickle write directly to the buffer is more memory-efficient
107+
pickle.dump(obj, handles.handle, protocol=protocol)
113108

114109

115110
@doc(
@@ -122,12 +117,15 @@ def read_pickle(
122117
storage_options: StorageOptions = None,
123118
):
124119
"""
125-
Load pickled pandas object (or any object) from file.
120+
Load pickled pandas object (or any object) from file. By default, only a
121+
safe subset of classes from builtins can be called while loading the
122+
object. See INFO file for customizing the security settings.
126123
127124
.. warning::
128125
129126
Loading pickled data received from untrusted sources can be
130-
unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
127+
unsafe if not using the default security settings.
128+
See `here <https://docs.python.org/3/library/pickle.html>`__.
131129
132130
Parameters
133131
----------
@@ -186,6 +184,27 @@ def read_pickle(
186184
3 3 8
187185
4 4 9
188186
"""
187+
188+
class RestrictedUnpickler(pickle.Unpickler):
189+
def find_class(self, module, name):
190+
opt = get_option("pickler.unpickle.mode")
191+
# Only allow safe modules and classes. Tuples defined in config
192+
# Do not allow unsafe modules and classes.
193+
if (
194+
(opt == "off")
195+
or (
196+
opt == "permit"
197+
and (module, name) in get_option("pickler.safe.tuples")
198+
)
199+
or (
200+
opt == "deny"
201+
and (module, name) not in get_option("pickler.unsafe.tuples")
202+
)
203+
):
204+
return super().find_class(module, name)
205+
# Forbid everything else.
206+
raise pickle.UnpicklingError(f"global '{module} . {name}' is forbidden")
207+
189208
excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
190209
with get_handle(
191210
filepath_or_buffer,
@@ -205,7 +224,7 @@ def read_pickle(
205224
with warnings.catch_warnings(record=True):
206225
# We want to silence any warnings about, e.g. moved modules.
207226
warnings.simplefilter("ignore", Warning)
208-
return pickle.load(handles.handle)
227+
return RestrictedUnpickler.load(handles.handle)
209228
except excs_to_catch:
210229
# e.g.
211230
# "No module named 'pandas.core.sparse.series'"
44 Bytes
Binary file not shown.

pandas/tests/io/test_clipboard.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,5 +397,10 @@ def test_round_trip_valid_encodings(self, enc, df):
397397
)
398398
def test_raw_roundtrip(self, data):
399399
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
400-
clipboard_set(data)
401-
assert data == clipboard_get()
400+
# adding coverage for when not implemented
401+
try:
402+
clipboard_set(data)
403+
assert data == clipboard_get()
404+
except PyperclipException:
405+
with pytest.raises(PyperclipException, match=r".*not-implemented-error.*"):
406+
clipboard_set(data)

pandas/tests/io/test_pickle.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,3 +598,20 @@ def test_pickle_frame_v124_unpickle_130():
598598

599599
expected = pd.DataFrame()
600600
tm.assert_frame_equal(df, expected)
601+
602+
603+
def test_read_pickle_forbidden():
604+
# related to CVE - https://nvd.nist.gov/vuln/detail/CVE-2020-13091
605+
606+
class MyEvilPickle:
607+
def __reduce__(self):
608+
return (os.system, ("whoami",))
609+
610+
pickle_data = pickle.dumps(MyEvilPickle())
611+
# storing the serialized output into a file in current directory
612+
path = os.path.join(os.path.dirname(__file__), "data", "pickle", "test_forbidden.pkl")
613+
with open(path, "wb") as file:
614+
file.write(pickle_data)
615+
616+
with pytest.raises(pickle.UnpicklingError, match=r".* forbidden"):
617+
pd.read_pickle(path)

0 commit comments

Comments
 (0)