Skip to content

Commit 802ee02

Browse files
committed
Python: Eliminate some FPs
Two main variants: cases where `__eq__` is not defined (which inherit `__hash__` automatically), and frozen dataclasses.
1 parent 682178a commit 802ee02

File tree

3 files changed

+92
-4
lines changed

3 files changed

+92
-4
lines changed

python/ql/src/Expressions/HashedButNoHash.ql

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,54 @@ predicate setsHashToNone(Class cls) {
2323
DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None
2424
}
2525

26+
/**
27+
* Holds if `cls` has a `@dataclass` decorator with `frozen=True` or `unsafe_hash=True`,
28+
* which generates a `__hash__` method at runtime.
29+
*/
30+
predicate hasDataclassHash(Class cls) {
31+
exists(DataFlow::CallCfgNode decorator |
32+
decorator = API::moduleImport("dataclasses").getMember("dataclass").getACall() and
33+
decorator.asExpr() = cls.getADecorator() and
34+
decorator.getArgByName(["frozen", "unsafe_hash"]).asExpr().(ImmutableLiteral).booleanValue() =
35+
true
36+
)
37+
}
38+
39+
/**
40+
* Holds if `cls` defines `__eq__` directly (not inherited from `object`),
41+
* or has a `@dataclass` decorator that generates `__eq__` (the default).
42+
*/
43+
predicate definesEq(Class cls) {
44+
DuckTyping::hasMethod(cls, "__eq__")
45+
or
46+
// @dataclass(...) call generates __eq__ unless eq=False
47+
exists(DataFlow::CallCfgNode decorator |
48+
decorator = API::moduleImport("dataclasses").getMember("dataclass").getACall() and
49+
decorator.asExpr() = cls.getADecorator() and
50+
not decorator.getArgByName("eq").asExpr().(ImmutableLiteral).booleanValue() = false
51+
)
52+
or
53+
// bare @dataclass without arguments also generates __eq__
54+
cls.getADecorator() =
55+
API::moduleImport("dataclasses").getMember("dataclass").getAValueReachableFromSource().asExpr()
56+
}
57+
2658
/**
2759
* Holds if `cls` is a user-defined class whose instances are unhashable.
28-
* A new-style class without `__hash__` is unhashable, as is one that explicitly
29-
* sets `__hash__ = None`.
60+
*
61+
* In Python, a class is unhashable if:
62+
* - It explicitly sets `__hash__ = None`, or
63+
* - It defines `__eq__` (directly or via decorator like `@dataclass`) without
64+
* defining `__hash__`, and doesn't have a decorator that generates `__hash__`.
3065
*/
3166
predicate isUnhashableUserClass(Class cls) {
67+
setsHashToNone(cls)
68+
or
3269
DuckTyping::isNewStyle(cls) and
70+
definesEq(cls) and
3371
not DuckTyping::hasMethod(cls, "__hash__") and
72+
not hasDataclassHash(cls) and
3473
not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls))
35-
or
36-
setsHashToNone(cls)
3774
}
3875

3976
/**
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
| expressions_test.py:42:20:42:25 | ControlFlowNode for unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | list |
2+
| expressions_test.py:299:17:299:19 | ControlFlowNode for obj | This $@ of $@ is unhashable. | expressions_test.py:298:11:298:23 | ControlFlowNode for HasEqNoHash() | instance | expressions_test.py:298:11:298:23 | ControlFlowNode for HasEqNoHash() | HasEqNoHash |
3+
| expressions_test.py:330:17:330:17 | ControlFlowNode for p | This $@ of $@ is unhashable. | expressions_test.py:329:9:329:27 | ControlFlowNode for MutableDataclass() | instance | expressions_test.py:329:9:329:27 | ControlFlowNode for MutableDataclass() | MutableDataclass |

python/ql/test/query-tests/Expressions/general/expressions_test.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,52 @@ def local():
279279
def apply(f):
280280
pass
281281
apply(foo)([1])
282+
283+
# Classes without __eq__ are hashable (inherit __hash__ from object)
284+
class NoEqClass:
285+
def __init__(self, x):
286+
self.x = x
287+
288+
def hash_no_eq():
289+
obj = NoEqClass(1)
290+
return hash(obj) # OK: no __eq__, so __hash__ is inherited from object
291+
292+
# Classes with __eq__ but no __hash__ are unhashable
293+
class HasEqNoHash:
294+
def __eq__(self, other):
295+
return self.x == other.x
296+
297+
def hash_eq_no_hash():
298+
obj = HasEqNoHash()
299+
return hash(obj) # should be flagged
300+
301+
# @dataclass(frozen=True) generates __hash__, so instances are hashable
302+
from dataclasses import dataclass
303+
304+
@dataclass(frozen=True)
305+
class FrozenPoint:
306+
x: int
307+
y: int
308+
309+
def hash_frozen_dataclass():
310+
p = FrozenPoint(1, 2)
311+
return hash(p) # OK: frozen dataclass has __hash__
312+
313+
# @dataclass(unsafe_hash=True) also generates __hash__
314+
@dataclass(unsafe_hash=True)
315+
class UnsafeHashPoint:
316+
x: int
317+
y: int
318+
319+
def hash_unsafe_hash_dataclass():
320+
p = UnsafeHashPoint(1, 2)
321+
return hash(p) # OK: unsafe_hash=True generates __hash__
322+
323+
# Plain @dataclass without frozen/unsafe_hash and with __eq__ is unhashable
324+
@dataclass
325+
class MutableDataclass:
326+
x: int
327+
328+
def hash_mutable_dataclass():
329+
p = MutableDataclass(1)
330+
return hash(p) # should be flagged: @dataclass generates __eq__ but not __hash__

0 commit comments

Comments
 (0)