From 33944c61142d961f4dfcbe3a38b4a6ca7680e64a Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Sun, 6 Jul 2025 16:17:28 +0200 Subject: [PATCH 1/6] Shared: Add shared concepts library --- .../internal/SensitiveDataHeuristics.qll | 188 ++++++++++++++++++ shared/concepts/qlpack.yml | 6 + 2 files changed, 194 insertions(+) create mode 100644 shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll create mode 100644 shared/concepts/qlpack.yml diff --git a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll new file mode 100644 index 000000000000..ede88ebf8149 --- /dev/null +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -0,0 +1,188 @@ +/** + * INTERNAL: Do not use. + * + * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. + * Such that we can share this logic across our CodeQL analysis of different languages. + * + * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. + */ + +/** + * A classification of different kinds of sensitive data: + * + * - secret: generic secret or trusted data; + * - id: a user name or other account information; + * - password: a password or authorization key; + * - certificate: a certificate. + * - private: private data such as credit card numbers + * + * While classifications are represented as strings, this should not be relied upon. + * Instead, use the predicates in `SensitiveDataClassification::` to work with + * classifications. + */ +class SensitiveDataClassification extends string { + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } +} + +/** + * Provides predicates to select the different kinds of sensitive data we support. + */ +module SensitiveDataClassification { + /** Gets the classification for secret or trusted data. */ + SensitiveDataClassification secret() { result = "secret" } + + /** Gets the classification for user names or other account information. */ + SensitiveDataClassification id() { result = "id" } + + /** Gets the classification for passwords or authorization keys. */ + SensitiveDataClassification password() { result = "password" } + + /** Gets the classification for certificates. */ + SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } +} + +/** + * INTERNAL: Do not use. + * + * Provides heuristics for identifying names related to sensitive information. + */ +module HeuristicNames { + /** + * Gets a regular expression that identifies strings that may indicate the presence of secret + * or trusted data. + */ + string maybeSecret() { result = "(?is).*((? Date: Sun, 6 Jul 2025 16:27:50 +0200 Subject: [PATCH 2/6] Use shared `SensitiveDataHeuristics` --- javascript/ql/lib/qlpack.yml | 1 + .../ql/lib/semmle/javascript/security/SensitiveActions.qll | 2 +- python/ql/lib/qlpack.yml | 1 + .../lib/semmle/python/dataflow/new/SensitiveDataSources.qll | 2 +- ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll | 2 +- .../ql/lib/codeql/ruby/security/internal/CleartextSources.qll | 4 ++-- ruby/ql/lib/qlpack.yml | 1 + rust/ql/lib/codeql/rust/security/SensitiveData.qll | 2 +- rust/ql/lib/qlpack.yml | 1 + swift/ql/lib/codeql/swift/security/SensitiveExprs.qll | 2 +- swift/ql/lib/qlpack.yml | 1 + 11 files changed, 12 insertions(+), 7 deletions(-) diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index ea90eead8388..482612a821d2 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: javascript library: true upgrades: upgrades dependencies: + codeql/concepts: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} diff --git a/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll b/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll index fed330e5a6f5..05a6fbc17d46 100644 --- a/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll +++ b/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll @@ -10,7 +10,7 @@ */ import javascript -import semmle.javascript.security.internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import HeuristicNames /** An expression that might contain sensitive data. */ diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 87101c60e09c..dbf5d1cddbdb 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: python library: true upgrades: upgrades dependencies: + codeql/concepts: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} diff --git a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll index c12358f6db91..0e017c4a2295 100644 --- a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll @@ -7,7 +7,7 @@ private import python private import semmle.python.dataflow.new.DataFlow // Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range` private import semmle.python.Frameworks -private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics +private import codeql.concepts.internal.SensitiveDataHeuristics as SensitiveDataHeuristics private import semmle.python.ApiGraphs // We export these explicitly, so we don't also export the `HeuristicNames` module. diff --git a/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll b/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll index 34beb33604b7..e0dc68e7136e 100644 --- a/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll +++ b/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll @@ -11,7 +11,7 @@ private import codeql.ruby.AST private import codeql.ruby.DataFlow -import codeql.ruby.security.internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import HeuristicNames private import codeql.ruby.CFG diff --git a/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll b/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll index 3338bbf65f70..f2867fa14bfe 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll @@ -8,8 +8,8 @@ private import codeql.ruby.AST private import codeql.ruby.DataFlow private import codeql.ruby.TaintTracking::TaintTracking private import codeql.ruby.dataflow.RemoteFlowSources -private import SensitiveDataHeuristics::HeuristicNames -private import SensitiveDataHeuristics +private import codeql.concepts.internal.SensitiveDataHeuristics::HeuristicNames +private import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.ruby.CFG private import codeql.ruby.dataflow.SSA diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index ef9f163cbd91..34a70c90e8bf 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ dbscheme: ruby.dbscheme upgrades: upgrades library: true dependencies: + codeql/concepts: ${workspace} codeql/controlflow: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} diff --git a/rust/ql/lib/codeql/rust/security/SensitiveData.qll b/rust/ql/lib/codeql/rust/security/SensitiveData.qll index bf3364abdb6b..4e6ba21a2d28 100644 --- a/rust/ql/lib/codeql/rust/security/SensitiveData.qll +++ b/rust/ql/lib/codeql/rust/security/SensitiveData.qll @@ -6,7 +6,7 @@ */ import rust -import internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.rust.dataflow.DataFlow /** diff --git a/rust/ql/lib/qlpack.yml b/rust/ql/lib/qlpack.yml index f2a10f4c4f74..9b9398839ba5 100644 --- a/rust/ql/lib/qlpack.yml +++ b/rust/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ dbscheme: rust.dbscheme library: true upgrades: upgrades dependencies: + codeql/concepts: ${workspace} codeql/controlflow: ${workspace} codeql/dataflow: ${workspace} codeql/regex: ${workspace} diff --git a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll index 044b2a054d7f..d78f9b405327 100644 --- a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll +++ b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll @@ -5,7 +5,7 @@ */ import swift -import internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.swift.dataflow.DataFlow private import codeql.swift.dataflow.ExternalFlow diff --git a/swift/ql/lib/qlpack.yml b/swift/ql/lib/qlpack.yml index bd0816247ca6..ea03d8d389d1 100644 --- a/swift/ql/lib/qlpack.yml +++ b/swift/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ dbscheme: swift.dbscheme upgrades: upgrades library: true dependencies: + codeql/concepts: ${workspace} codeql/controlflow: ${workspace} codeql/dataflow: ${workspace} codeql/regex: ${workspace} From eaa04040c442aa50489c0ffd8e23095da86e6f3b Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Sun, 6 Jul 2025 16:33:08 +0200 Subject: [PATCH 3/6] Remove duplicate copies of `SensitiveDataHeuristics` --- config/identical-files.json | 7 - .../internal/SensitiveDataHeuristics.qll | 188 ------------------ .../internal/SensitiveDataHeuristics.qll | 188 ------------------ .../internal/SensitiveDataHeuristics.qll | 188 ------------------ .../internal/SensitiveDataHeuristics.qll | 188 ------------------ .../internal/SensitiveDataHeuristics.qll | 188 ------------------ 6 files changed, 947 deletions(-) delete mode 100644 javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll delete mode 100644 python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll delete mode 100644 ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll delete mode 100644 rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll delete mode 100644 swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll diff --git a/config/identical-files.json b/config/identical-files.json index 56aac5604734..a3da11e15e41 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -243,13 +243,6 @@ "ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll", "rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll" ], - "SensitiveDataHeuristics Python/JS": [ - "javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll", - "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll", - "ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll", - "swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll", - "rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll" - ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", "ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll" diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll deleted file mode 100644 index ede88ebf8149..000000000000 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ /dev/null @@ -1,188 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. - * Such that we can share this logic across our CodeQL analysis of different languages. - * - * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. - */ - -/** - * A classification of different kinds of sensitive data: - * - * - secret: generic secret or trusted data; - * - id: a user name or other account information; - * - password: a password or authorization key; - * - certificate: a certificate. - * - private: private data such as credit card numbers - * - * While classifications are represented as strings, this should not be relied upon. - * Instead, use the predicates in `SensitiveDataClassification::` to work with - * classifications. - */ -class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } -} - -/** - * Provides predicates to select the different kinds of sensitive data we support. - */ -module SensitiveDataClassification { - /** Gets the classification for secret or trusted data. */ - SensitiveDataClassification secret() { result = "secret" } - - /** Gets the classification for user names or other account information. */ - SensitiveDataClassification id() { result = "id" } - - /** Gets the classification for passwords or authorization keys. */ - SensitiveDataClassification password() { result = "password" } - - /** Gets the classification for certificates. */ - SensitiveDataClassification certificate() { result = "certificate" } - - /** Gets the classification for private data. */ - SensitiveDataClassification private() { result = "private" } -} - -/** - * INTERNAL: Do not use. - * - * Provides heuristics for identifying names related to sensitive information. - */ -module HeuristicNames { - /** - * Gets a regular expression that identifies strings that may indicate the presence of secret - * or trusted data. - */ - string maybeSecret() { result = "(?is).*((? Date: Mon, 7 Jul 2025 11:29:13 +0200 Subject: [PATCH 4/6] Properly share `CryptoAlgorithms` and `CryptoAlgorithmNames` --- .../javascript/frameworks/CryptoLibraries.qll | 2 +- .../javascript/internal/ConceptsImports.qll | 2 +- .../javascript/security/CryptoAlgorithms.qll | 2 +- python/ql/lib/semmle/crypto/Crypto.qll | 2 +- .../python/concepts/CryptoAlgorithms.qll | 2 +- .../python/internal/ConceptsImports.qll | 2 +- .../codeql/ruby/internal/ConceptsImports.qll | 2 +- .../codeql/ruby/security/CryptoAlgorithms.qll | 2 +- ruby/ql/lib/codeql/ruby/security/OpenSSL.qll | 2 +- .../security/CryptoAlgorithms.ql | 2 +- .../codeql/rust/security/CryptoAlgorithms.qll | 2 +- .../codeql/concepts/CryptoAlgorithms.qll | 117 ++++++++++++++++++ .../internal/CryptoAlgorithmNames.qll | 84 +++++++++++++ 13 files changed, 212 insertions(+), 11 deletions(-) create mode 100644 shared/concepts/codeql/concepts/CryptoAlgorithms.qll create mode 100644 shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll diff --git a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll index db527c03f95d..9cc76b5f5b8c 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll @@ -4,7 +4,7 @@ import javascript import semmle.javascript.Concepts::Cryptography -private import semmle.javascript.security.internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A key used in a cryptographic algorithm. diff --git a/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll b/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll index aba7a83437a8..3aae9c05fb5e 100644 --- a/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll +++ b/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll @@ -4,4 +4,4 @@ */ import semmle.javascript.dataflow.DataFlow::DataFlow as DataFlow -import semmle.javascript.security.CryptoAlgorithms as CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms as CryptoAlgorithms diff --git a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll index 7176c666c573..01b568d234af 100644 --- a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll +++ b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll @@ -4,7 +4,7 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A cryptographic algorithm. diff --git a/python/ql/lib/semmle/crypto/Crypto.qll b/python/ql/lib/semmle/crypto/Crypto.qll index 5e2bb97a0aa7..0ea453989c88 100644 --- a/python/ql/lib/semmle/crypto/Crypto.qll +++ b/python/ql/lib/semmle/crypto/Crypto.qll @@ -1,3 +1,3 @@ /** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */ -import semmle.python.concepts.CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index 7176c666c573..01b568d234af 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -4,7 +4,7 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A cryptographic algorithm. diff --git a/python/ql/lib/semmle/python/internal/ConceptsImports.qll b/python/ql/lib/semmle/python/internal/ConceptsImports.qll index 73ab482f829e..763b26017fbf 100644 --- a/python/ql/lib/semmle/python/internal/ConceptsImports.qll +++ b/python/ql/lib/semmle/python/internal/ConceptsImports.qll @@ -4,4 +4,4 @@ */ import semmle.python.dataflow.new.DataFlow -import semmle.python.concepts.CryptoAlgorithms as CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms as CryptoAlgorithms diff --git a/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll b/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll index 478fffe56aef..c0f99aafad6c 100644 --- a/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll +++ b/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll @@ -4,4 +4,4 @@ */ import codeql.ruby.DataFlow -import codeql.ruby.security.CryptoAlgorithms as CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms as CryptoAlgorithms diff --git a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll index 7176c666c573..01b568d234af 100644 --- a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll +++ b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll @@ -4,7 +4,7 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A cryptographic algorithm. diff --git a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll index 26f6d996f144..3775657fc123 100644 --- a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll +++ b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll @@ -3,7 +3,7 @@ * an underlying OpenSSL or LibreSSL C library. */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames private import codeql.ruby.Concepts private import codeql.ruby.DataFlow private import codeql.ruby.ApiGraphs diff --git a/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql b/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql index c4c42532d899..f19368e3656f 100644 --- a/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql +++ b/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql @@ -1,5 +1,5 @@ import codeql.ruby.AST -import codeql.ruby.security.CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms query predicate weakHashingAlgorithms(HashingAlgorithm ha) { ha.isWeak() } diff --git a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll index 7176c666c573..01b568d234af 100644 --- a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll +++ b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll @@ -4,7 +4,7 @@ * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A cryptographic algorithm. diff --git a/shared/concepts/codeql/concepts/CryptoAlgorithms.qll b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll new file mode 100644 index 000000000000..01b568d234af --- /dev/null +++ b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll @@ -0,0 +1,117 @@ +/** + * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. + * + * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). + */ + +private import codeql.concepts.internal.CryptoAlgorithmNames + +/** + * A cryptographic algorithm. + */ +private newtype TCryptographicAlgorithm = + MkHashingAlgorithm(string name, boolean isWeak) { + isStrongHashingAlgorithm(name) and isWeak = false + or + isWeakHashingAlgorithm(name) and isWeak = true + } or + MkEncryptionAlgorithm(string name, boolean isWeak) { + isStrongEncryptionAlgorithm(name) and isWeak = false + or + isWeakEncryptionAlgorithm(name) and isWeak = true + } or + MkPasswordHashingAlgorithm(string name, boolean isWeak) { + isStrongPasswordHashingAlgorithm(name) and isWeak = false + or + isWeakPasswordHashingAlgorithm(name) and isWeak = true + } + +/** + * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. + * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. + * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. + */ +bindingset[name] +private CryptographicAlgorithm getBestAlgorithmForName(string name) { + result = + max(CryptographicAlgorithm algorithm | + algorithm.getName() = + [ + name.toUpperCase(), // the full name + name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces + name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores + ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces + | + algorithm order by algorithm.getName().length() + ) +} + +/** + * A cryptographic algorithm. + */ +abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { + /** Gets a textual representation of this element. */ + string toString() { result = this.getName() } + + /** + * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). + */ + abstract string getName(); + + /** + * Holds if the name of this algorithm is the most specific match for `name`. + * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. + */ + bindingset[name] + predicate matchesName(string name) { this = getBestAlgorithmForName(name) } + + /** + * Holds if this algorithm is weak. + */ + abstract predicate isWeak(); +} + +/** + * A hashing algorithm such as `MD5` or `SHA512`. + */ +class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } +} + +/** + * An encryption algorithm such as `DES` or `AES512`. + */ +class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } + + /** Holds if this algorithm is a stream cipher. */ + predicate isStreamCipher() { isStreamCipher(name) } +} + +/** + * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. + */ +class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } +} diff --git a/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll new file mode 100644 index 000000000000..8bb63d97876a --- /dev/null +++ b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll @@ -0,0 +1,84 @@ +/** + * Names of cryptographic algorithms, separated into strong and weak variants. + * + * The names are normalized: upper-case, no spaces, dashes or underscores. + * + * The names are inspired by the names used in real world crypto libraries. + * + * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). + */ + +/** + * Holds if `name` corresponds to a strong hashing algorithm. + */ +predicate isStrongHashingAlgorithm(string name) { + name = + [ + // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 + // and https://www.blake2.net/ + "BLAKE2", "BLAKE2B", "BLAKE2S", + // see https://github.com/BLAKE3-team/BLAKE3 + "BLAKE3", + // + "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", + "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", + // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 + "SHAKE128", "SHAKE256", + // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 + "SM3", + // see https://security.stackexchange.com/a/216297 + "WHIRLPOOL", + ] +} + +/** + * Holds if `name` corresponds to a weak hashing algorithm. + */ +predicate isWeakHashingAlgorithm(string name) { + name = + [ + "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", + "RIPEMD320", "SHA0", "SHA1" + ] +} + +/** + * Holds if `name` corresponds to a strong encryption algorithm. + */ +predicate isStrongEncryptionAlgorithm(string name) { + name = + [ + "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", + "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", + "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", + "IDEA", "RABBIT", "RSA", "SEED", "SM4" + ] +} + +/** + * Holds if `name` corresponds to a weak encryption algorithm. + */ +predicate isWeakEncryptionAlgorithm(string name) { + name = + [ + "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", + "ARCFOUR", "ARC5", "RC5" + ] +} + +/** + * Holds if `name` corresponds to a strong password hashing algorithm. + */ +predicate isStrongPasswordHashingAlgorithm(string name) { + name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] +} + +/** + * Holds if `name` corresponds to a weak password hashing algorithm. + */ +predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } + +/** + * Holds if `name` corresponds to a stream cipher. + */ +predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } From 7517cb32531394c49eeb8f7b2f17e052ded4f1da Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Mon, 7 Jul 2025 11:39:12 +0200 Subject: [PATCH 5/6] Remove duplicate copies of `CryptoAlgorithms` and `CryptoAlgorithmNames` --- config/identical-files.json | 12 -- .../javascript/security/CryptoAlgorithms.qll | 114 +----------------- .../internal/CryptoAlgorithmNames.qll | 84 ------------- .../python/concepts/CryptoAlgorithms.qll | 114 +----------------- .../internal/CryptoAlgorithmNames.qll | 84 ------------- .../codeql/ruby/security/CryptoAlgorithms.qll | 114 +----------------- .../internal/CryptoAlgorithmNames.qll | 84 ------------- .../codeql/rust/security/CryptoAlgorithms.qll | 114 +----------------- .../internal/CryptoAlgorithmNames.qll | 84 ------------- 9 files changed, 4 insertions(+), 800 deletions(-) delete mode 100644 javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll delete mode 100644 python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll delete mode 100644 ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll delete mode 100644 rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll diff --git a/config/identical-files.json b/config/identical-files.json index a3da11e15e41..977f3f4a6473 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -231,18 +231,6 @@ "java/ql/src/experimental/Security/CWE/CWE-400/LocalThreadResourceAbuse.qhelp", "java/ql/src/experimental/Security/CWE/CWE-400/ThreadResourceAbuse.qhelp" ], - "CryptoAlgorithms Python/JS/Ruby": [ - "javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll", - "python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll", - "ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll", - "rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll" - ], - "CryptoAlgorithmNames Python/JS/Ruby": [ - "javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll", - "python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll", - "ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll", - "rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll" - ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", "ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll" diff --git a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll index 01b568d234af..f13d72312fe2 100644 --- a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll +++ b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import codeql.concepts.internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +private import codeql.concepts.CryptoAlgorithms diff --git a/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll b/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index 01b568d234af..13a03a3bd888 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import codeql.concepts.internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll index 01b568d234af..13a03a3bd888 100644 --- a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll +++ b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import codeql.concepts.internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll b/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll index 01b568d234af..13a03a3bd888 100644 --- a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll +++ b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import codeql.concepts.internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll b/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } From 05a28b9be2ee14a9b1fe675ec456e65a43ba69ac Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Mon, 7 Jul 2025 11:55:50 +0200 Subject: [PATCH 6/6] Add overlay annotations --- shared/concepts/codeql/concepts/CryptoAlgorithms.qll | 2 ++ .../concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll | 2 ++ .../codeql/concepts/internal/SensitiveDataHeuristics.qll | 2 ++ 3 files changed, 6 insertions(+) diff --git a/shared/concepts/codeql/concepts/CryptoAlgorithms.qll b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll index 01b568d234af..23a45027cf14 100644 --- a/shared/concepts/codeql/concepts/CryptoAlgorithms.qll +++ b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll @@ -3,6 +3,8 @@ * * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ +overlay[local?] +module; private import codeql.concepts.internal.CryptoAlgorithmNames diff --git a/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll index 8bb63d97876a..efcd870c724a 100644 --- a/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll +++ b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll @@ -7,6 +7,8 @@ * * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ +overlay[local?] +module; /** * Holds if `name` corresponds to a strong hashing algorithm. diff --git a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll index ede88ebf8149..c50d1341c778 100644 --- a/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -6,6 +6,8 @@ * * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. */ +overlay[local?] +module; /** * A classification of different kinds of sensitive data: