diff --git a/config/identical-files.json b/config/identical-files.json index 56aac5604734..89beb48acd40 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -231,35 +231,10 @@ "java/ql/src/experimental/Security/CWE/CWE-400/LocalThreadResourceAbuse.qhelp", "java/ql/src/experimental/Security/CWE/CWE-400/ThreadResourceAbuse.qhelp" ], - "CryptoAlgorithms Python/JS/Ruby": [ - "javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll", - "python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll", - "ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll", - "rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll" - ], - "CryptoAlgorithmNames Python/JS/Ruby": [ - "javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll", - "python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll", - "ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll", - "rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll" - ], - "SensitiveDataHeuristics Python/JS": [ - "javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll", - "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll", - "ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll", - "swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll", - "rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll" - ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", "ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll" ], - "Concepts Python/Ruby/JS": [ - "python/ql/lib/semmle/python/internal/ConceptsShared.qll", - "ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll", - "javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll", - "rust/ql/lib/codeql/rust/internal/ConceptsShared.qll" - ], "ApiGraphModels": [ "javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll", "ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll", diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index ce5076a8e164..4703915c8801 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: javascript library: true upgrades: upgrades dependencies: + codeql/concepts: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} diff --git a/javascript/ql/lib/semmle/javascript/Concepts.qll b/javascript/ql/lib/semmle/javascript/Concepts.qll index 3fce9f6f34a0..76c67156d1ca 100644 --- a/javascript/ql/lib/semmle/javascript/Concepts.qll +++ b/javascript/ql/lib/semmle/javascript/Concepts.qll @@ -5,7 +5,11 @@ */ import javascript +private import semmle.javascript.dataflow.internal.sharedlib.DataFlowArg private import codeql.threatmodels.ThreatModels +private import codeql.concepts.ConceptsShared + +private module ConceptsShared = ConceptsMake; /** * A data flow source, for a specific threat-model. @@ -206,7 +210,7 @@ abstract class PersistentWriteAccess extends DataFlow::Node { * Provides models for cryptographic things. */ module Cryptography { - private import semmle.javascript.internal.ConceptsShared::Cryptography as SC + private import ConceptsShared::Cryptography as SC /** * A data-flow node that is an application of a cryptographic algorithm. For example, diff --git a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll index db527c03f95d..9cc76b5f5b8c 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/CryptoLibraries.qll @@ -4,7 +4,7 @@ import javascript import semmle.javascript.Concepts::Cryptography -private import semmle.javascript.security.internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames /** * A key used in a cryptographic algorithm. diff --git a/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll b/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll deleted file mode 100644 index aba7a83437a8..000000000000 --- a/javascript/ql/lib/semmle/javascript/internal/ConceptsImports.qll +++ /dev/null @@ -1,7 +0,0 @@ -/** - * This file contains imports required for the JavaScript version of `ConceptsShared.qll`. - * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages. - */ - -import semmle.javascript.dataflow.DataFlow::DataFlow as DataFlow -import semmle.javascript.security.CryptoAlgorithms as CryptoAlgorithms diff --git a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll b/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll deleted file mode 100644 index 1b13e4ebb17e..000000000000 --- a/javascript/ql/lib/semmle/javascript/internal/ConceptsShared.qll +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Provides Concepts which are shared across languages. - * - * Each language has a language specific `Concepts.qll` file that can import the - * shared concepts from this file. A language can either re-export the concept directly, - * or can add additional member-predicates that are needed for that language. - * - * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from - * each language, but we will maintain a discipline of moving those concepts to - * `ConceptsShared.qll` ASAP. - */ - -private import ConceptsImports - -/** - * Provides models for cryptographic concepts. - * - * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into - * consideration for the `isWeak` member predicate. So RSA is always considered - * secure, although using a low number of bits will actually make it insecure. We plan - * to improve our libraries in the future to more precisely capture this aspect. - */ -module Cryptography { - class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - - class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - - class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - - class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; - - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `CryptographicOperation::Range` instead. - */ - class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } - - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - DataFlow::Node getInitialization() { result = super.getInitialization() } - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - BlockMode getBlockMode() { result = super.getBlockMode() } - } - - /** Provides classes for modeling new applications of a cryptographic algorithms. */ - module CryptographicOperation { - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `CryptographicOperation` instead. - */ - abstract class Range extends DataFlow::Node { - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - abstract DataFlow::Node getInitialization(); - - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - abstract CryptographicAlgorithm getAlgorithm(); - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - abstract DataFlow::Node getAnInput(); - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - abstract BlockMode getBlockMode(); - } - } - - /** - * A cryptographic block cipher mode of operation. This can be used to encrypt - * data of arbitrary length using a block encryption algorithm. - */ - class BlockMode extends string { - BlockMode() { - this = - [ - "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP", - "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final - "EAX" // https://en.wikipedia.org/wiki/EAX_mode - ] - } - - /** Holds if this block mode is considered to be insecure. */ - predicate isWeak() { this = "ECB" } - - /** Holds if the given string appears to match this block mode. */ - bindingset[s] - predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") } - } -} - -/** Provides classes for modeling HTTP-related APIs. */ -module Http { - /** Provides classes for modeling HTTP clients. */ - module Client { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `Http::Client::Request::Range` instead. - */ - class Request extends DataFlow::Node instanceof Request::Range { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - DataFlow::Node getAUrlPart() { result = super.getAUrlPart() } - - /** Gets a string that identifies the framework used for this request. */ - string getFramework() { result = super.getFramework() } - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ) { - super.disablesCertificateValidation(disablingNode, argumentOrigin) - } - } - - /** Provides a class for modeling new HTTP requests. */ - module Request { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `Http::Client::Request` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - abstract DataFlow::Node getAUrlPart(); - - /** Gets a string that identifies the framework used for this request. */ - abstract string getFramework(); - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - abstract predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ); - } - } - } -} diff --git a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll index 7176c666c573..f13d72312fe2 100644 --- a/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll +++ b/javascript/ql/lib/semmle/javascript/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +private import codeql.concepts.CryptoAlgorithms diff --git a/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll b/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll index fed330e5a6f5..05a6fbc17d46 100644 --- a/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll +++ b/javascript/ql/lib/semmle/javascript/security/SensitiveActions.qll @@ -10,7 +10,7 @@ */ import javascript -import semmle.javascript.security.internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import HeuristicNames /** An expression that might contain sensitive data. */ diff --git a/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll b/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/javascript/ql/lib/semmle/javascript/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll deleted file mode 100644 index ede88ebf8149..000000000000 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ /dev/null @@ -1,188 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. - * Such that we can share this logic across our CodeQL analysis of different languages. - * - * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. - */ - -/** - * A classification of different kinds of sensitive data: - * - * - secret: generic secret or trusted data; - * - id: a user name or other account information; - * - password: a password or authorization key; - * - certificate: a certificate. - * - private: private data such as credit card numbers - * - * While classifications are represented as strings, this should not be relied upon. - * Instead, use the predicates in `SensitiveDataClassification::` to work with - * classifications. - */ -class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } -} - -/** - * Provides predicates to select the different kinds of sensitive data we support. - */ -module SensitiveDataClassification { - /** Gets the classification for secret or trusted data. */ - SensitiveDataClassification secret() { result = "secret" } - - /** Gets the classification for user names or other account information. */ - SensitiveDataClassification id() { result = "id" } - - /** Gets the classification for passwords or authorization keys. */ - SensitiveDataClassification password() { result = "password" } - - /** Gets the classification for certificates. */ - SensitiveDataClassification certificate() { result = "certificate" } - - /** Gets the classification for private data. */ - SensitiveDataClassification private() { result = "private" } -} - -/** - * INTERNAL: Do not use. - * - * Provides heuristics for identifying names related to sensitive information. - */ -module HeuristicNames { - /** - * Gets a regular expression that identifies strings that may indicate the presence of secret - * or trusted data. - */ - string maybeSecret() { result = "(?is).*((?; /** * A data flow source, for a specific threat-model. @@ -1617,7 +1622,7 @@ module Http { } } - import semmle.python.internal.ConceptsShared::Http::Client as Client + import ConceptsShared::Http::Client as Client // TODO: investigate whether we should treat responses to client requests as // remote-flow-sources in general. } @@ -1725,5 +1730,5 @@ module Cryptography { } } - import semmle.python.internal.ConceptsShared::Cryptography + import ConceptsShared::Cryptography } diff --git a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll index 7176c666c573..13a03a3bd888 100644 --- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll +++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll index c12358f6db91..0e017c4a2295 100644 --- a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll @@ -7,7 +7,7 @@ private import python private import semmle.python.dataflow.new.DataFlow // Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range` private import semmle.python.Frameworks -private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics +private import codeql.concepts.internal.SensitiveDataHeuristics as SensitiveDataHeuristics private import semmle.python.ApiGraphs // We export these explicitly, so we don't also export the `HeuristicNames` module. diff --git a/python/ql/lib/semmle/python/frameworks/Aiohttp.qll b/python/ql/lib/semmle/python/frameworks/Aiohttp.qll index 2f5b0393c5f3..43185efef771 100644 --- a/python/ql/lib/semmle/python/frameworks/Aiohttp.qll +++ b/python/ql/lib/semmle/python/frameworks/Aiohttp.qll @@ -758,7 +758,7 @@ module AiohttpClientModel { private API::Node instance() { result = classRef().getReturn() } /** A method call on a ClientSession that sends off a request */ - private class OutgoingRequestCall extends Http::Client::Request::Range, API::CallNode { + private class OutgoingRequestCall extends Http::Client::Request::Range instanceof API::CallNode { string methodName; OutgoingRequestCall() { @@ -767,13 +767,13 @@ module AiohttpClientModel { } override DataFlow::Node getAUrlPart() { - result = this.getArgByName("url") + result = super.getArgByName("url") or methodName in [Http::httpVerbLower(), "ws_connect"] and - result = this.getArg(0) + result = super.getArg(0) or methodName = "request" and - result = this.getArg(1) + result = super.getArg(1) } override string getFramework() { result = "aiohttp.ClientSession" } @@ -781,7 +781,7 @@ module AiohttpClientModel { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - exists(API::Node param | param = this.getKeywordParameter(["ssl", "verify_ssl"]) | + exists(API::Node param | param = super.getKeywordParameter(["ssl", "verify_ssl"]) | disablingNode = param.asSink() and argumentOrigin = param.getAValueReachingSink() and // aiohttp.client treats `None` as the default and all other "falsey" values as `False`. diff --git a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll index 4dc193b13866..81025e27d782 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptodome.qll @@ -107,8 +107,7 @@ private module CryptodomeModel { /** * A cryptographic operation on an instance from the `Cipher` subpackage of `Cryptodome`/`Crypto`. */ - class CryptodomeGenericCipherOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::CallCfgNode + class CryptodomeGenericCipherOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode { string methodName; string cipherName; @@ -134,31 +133,31 @@ private module CryptodomeModel { override DataFlow::Node getAnInput() { methodName = "encrypt" and - result in [this.getArg(0), this.getArgByName(["message", "plaintext"])] + result in [super.getArg(0), super.getArgByName(["message", "plaintext"])] or methodName = "decrypt" and - result in [this.getArg(0), this.getArgByName("ciphertext")] + result in [super.getArg(0), super.getArgByName("ciphertext")] or // for the following methods, method signatures can be found in // https://pycryptodome.readthedocs.io/en/latest/src/cipher/modern.html methodName = "update" and - result in [this.getArg(0), this.getArgByName("data")] + result in [super.getArg(0), super.getArgByName("data")] or // although `mac_tag` is used as the parameter name in the spec above, some implementations use `received_mac_tag`, for an example, see // https://github.com/Legrandin/pycryptodome/blob/5dace638b70ac35bb5d9b565f3e75f7869c9d851/lib/Crypto/Cipher/ChaCha20_Poly1305.py#L207 methodName = "verify" and - result in [this.getArg(0), this.getArgByName(["mac_tag", "received_mac_tag"])] + result in [super.getArg(0), super.getArgByName(["mac_tag", "received_mac_tag"])] or methodName = "hexverify" and - result in [this.getArg(0), this.getArgByName("mac_tag_hex")] + result in [super.getArg(0), super.getArgByName("mac_tag_hex")] or methodName = "encrypt_and_digest" and - result in [this.getArg(0), this.getArgByName("plaintext")] + result in [super.getArg(0), super.getArgByName("plaintext")] or methodName = "decrypt_and_verify" and result in [ - this.getArg(0), this.getArgByName("ciphertext"), this.getArg(1), - this.getArgByName("mac_tag") + super.getArg(0), super.getArgByName("ciphertext"), super.getArg(1), + super.getArgByName("mac_tag") ] } @@ -180,8 +179,7 @@ private module CryptodomeModel { /** * A cryptographic operation on an instance from the `Signature` subpackage of `Cryptodome`/`Crypto`. */ - class CryptodomeGenericSignatureOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::CallCfgNode + class CryptodomeGenericSignatureOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode { API::CallNode newCall; string methodName; @@ -206,13 +204,13 @@ private module CryptodomeModel { override DataFlow::Node getAnInput() { methodName = "sign" and - result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance + result in [super.getArg(0), super.getArgByName("msg_hash")] // Cryptodome.Hash instance or methodName = "verify" and ( - result in [this.getArg(0), this.getArgByName("msg_hash")] // Cryptodome.Hash instance + result in [super.getArg(0), super.getArgByName("msg_hash")] // Cryptodome.Hash instance or - result in [this.getArg(1), this.getArgByName("signature")] + result in [super.getArg(1), super.getArgByName("signature")] ) } @@ -222,8 +220,7 @@ private module CryptodomeModel { /** * A cryptographic operation on an instance from the `Hash` subpackage of `Cryptodome`/`Crypto`. */ - class CryptodomeGenericHashOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::CallCfgNode + class CryptodomeGenericHashOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode { API::CallNode newCall; string hashName; @@ -244,7 +241,7 @@ private module CryptodomeModel { override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + override DataFlow::Node getAnInput() { result in [super.getArg(0), super.getArgByName("data")] } override Cryptography::BlockMode getBlockMode() { none() } } diff --git a/python/ql/lib/semmle/python/frameworks/Cryptography.qll b/python/ql/lib/semmle/python/frameworks/Cryptography.qll index 41d69d064d73..8996f2d2c08c 100644 --- a/python/ql/lib/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/lib/semmle/python/frameworks/Cryptography.qll @@ -206,8 +206,7 @@ private module CryptographyModel { /** * An encrypt or decrypt operation from `cryptography.hazmat.primitives.ciphers`. */ - class CryptographyGenericCipherOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::MethodCallNode + class CryptographyGenericCipherOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::MethodCallNode { API::CallNode init; string algorithmName; @@ -225,7 +224,9 @@ private module CryptographyModel { result.matchesName(algorithmName) } - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + override DataFlow::Node getAnInput() { + result in [super.getArg(0), super.getArgByName("data")] + } override Cryptography::BlockMode getBlockMode() { result = modeName } } @@ -263,8 +264,7 @@ private module CryptographyModel { /** * An hashing operation from `cryptography.hazmat.primitives.hashes`. */ - class CryptographyGenericHashOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::MethodCallNode + class CryptographyGenericHashOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::MethodCallNode { API::CallNode init; string algorithmName; @@ -280,7 +280,9 @@ private module CryptographyModel { result.matchesName(algorithmName) } - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + override DataFlow::Node getAnInput() { + result in [super.getArg(0), super.getArgByName("data")] + } override Cryptography::BlockMode getBlockMode() { none() } } diff --git a/python/ql/lib/semmle/python/frameworks/Httpx.qll b/python/ql/lib/semmle/python/frameworks/Httpx.qll index fee9a840a193..bca1c25b4deb 100644 --- a/python/ql/lib/semmle/python/frameworks/Httpx.qll +++ b/python/ql/lib/semmle/python/frameworks/Httpx.qll @@ -26,7 +26,7 @@ module HttpxModel { * * See https://www.python-httpx.org/api/ */ - private class RequestCall extends Http::Client::Request::Range, API::CallNode { + private class RequestCall extends Http::Client::Request::Range instanceof API::CallNode { string methodName; RequestCall() { @@ -35,11 +35,11 @@ module HttpxModel { } override DataFlow::Node getAUrlPart() { - result = this.getArgByName("url") + result = super.getArgByName("url") or if methodName in ["request", "stream"] - then result = this.getArg(1) - else result = this.getArg(0) + then result = super.getArg(1) + else result = super.getArg(0) } override string getFramework() { result = "httpx" } @@ -47,8 +47,8 @@ module HttpxModel { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - disablingNode = this.getKeywordParameter("verify").asSink() and - argumentOrigin = this.getKeywordParameter("verify").getAValueReachingSink() and + disablingNode = super.getKeywordParameter("verify").asSink() and + argumentOrigin = super.getKeywordParameter("verify").getAValueReachingSink() and // unlike `requests`, httpx treats `None` as turning off verify (and not as the default) argumentOrigin.asExpr().(ImmutableLiteral).booleanValue() = false // TODO: Handling of insecure SSLContext passed to verify argument @@ -69,7 +69,8 @@ module HttpxModel { } /** A method call on a Client that sends off a request */ - private class OutgoingRequestCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class OutgoingRequestCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode + { string methodName; OutgoingRequestCall() { @@ -78,11 +79,11 @@ module HttpxModel { } override DataFlow::Node getAUrlPart() { - result = this.getArgByName("url") + result = super.getArgByName("url") or if methodName in ["request", "stream"] - then result = this.getArg(1) - else result = this.getArg(0) + then result = super.getArg(1) + else result = super.getArg(0) } override string getFramework() { result = "httpx.[Async]Client" } diff --git a/python/ql/lib/semmle/python/frameworks/Libtaxii.qll b/python/ql/lib/semmle/python/frameworks/Libtaxii.qll index 5df0e7127ecf..65dbb79566c9 100644 --- a/python/ql/lib/semmle/python/frameworks/Libtaxii.qll +++ b/python/ql/lib/semmle/python/frameworks/Libtaxii.qll @@ -22,13 +22,13 @@ private module Libtaxii { * A call to `libtaxii.common.parse`. * When the `allow_url` parameter value is set to `True`, there is an SSRF vulnerability.. */ - private class ParseCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class ParseCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { ParseCall() { this = API::moduleImport("libtaxii").getMember("common").getMember("parse").getACall() and this.getArgByName("allow_url").getALocalSource().asExpr() = any(True t) } - override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("s")] } + override DataFlow::Node getAUrlPart() { result in [super.getArg(0), super.getArgByName("s")] } override string getFramework() { result = "libtaxii.common.parse" } diff --git a/python/ql/lib/semmle/python/frameworks/Pycurl.qll b/python/ql/lib/semmle/python/frameworks/Pycurl.qll index 9ce97388662a..7280eec5f61c 100644 --- a/python/ql/lib/semmle/python/frameworks/Pycurl.qll +++ b/python/ql/lib/semmle/python/frameworks/Pycurl.qll @@ -52,14 +52,15 @@ module Pycurl { * * See http://pycurl.io/docs/latest/curlobject.html#pycurl.Curl.setopt. */ - private class OutgoingRequestCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class OutgoingRequestCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode + { OutgoingRequestCall() { this = setopt().getACall() and this.getArg(0).asCfgNode().(AttrNode).getName() = "URL" } override DataFlow::Node getAUrlPart() { - result in [this.getArg(1), this.getArgByName("value")] + result in [super.getArg(1), super.getArgByName("value")] } override string getFramework() { result = "pycurl.Curl" } @@ -77,7 +78,7 @@ module Pycurl { * * See http://pycurl.io/docs/latest/curlobject.html#pycurl.Curl.setopt. */ - private class CurlSslCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class CurlSslCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { CurlSslCall() { this = setopt().getACall() and this.getArg(0).asCfgNode().(AttrNode).getName() = ["SSL_VERIFYPEER", "SSL_VERIFYHOST"] @@ -90,13 +91,13 @@ module Pycurl { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - sslverifypeer().getAValueReachableFromSource() = this.getArg(0) and + sslverifypeer().getAValueReachableFromSource() = super.getArg(0) and ( - this.getArg(1).asExpr().(IntegerLiteral).getValue() = 0 + super.getArg(1).asExpr().(IntegerLiteral).getValue() = 0 or - this.getArg(1).asExpr().(BooleanLiteral).booleanValue() = false + super.getArg(1).asExpr().(BooleanLiteral).booleanValue() = false ) and - (disablingNode = this and argumentOrigin = this.getArg(1)) + (disablingNode = this and argumentOrigin = super.getArg(1)) } } } diff --git a/python/ql/lib/semmle/python/frameworks/Requests.qll b/python/ql/lib/semmle/python/frameworks/Requests.qll index e47ac10df3e5..4c8038787c96 100644 --- a/python/ql/lib/semmle/python/frameworks/Requests.qll +++ b/python/ql/lib/semmle/python/frameworks/Requests.qll @@ -29,7 +29,7 @@ module Requests { * * See https://requests.readthedocs.io/en/latest/api/#requests.request */ - private class OutgoingRequestCall extends Http::Client::Request::Range, API::CallNode { + private class OutgoingRequestCall extends Http::Client::Request::Range instanceof API::CallNode { string methodName; OutgoingRequestCall() { @@ -50,20 +50,20 @@ module Requests { } override DataFlow::Node getAUrlPart() { - result = this.getArgByName("url") + result = super.getArgByName("url") or not methodName = "request" and - result = this.getArg(0) + result = super.getArg(0) or methodName = "request" and - result = this.getArg(1) + result = super.getArg(1) } override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - disablingNode = this.getKeywordParameter("verify").asSink() and - argumentOrigin = this.getKeywordParameter("verify").getAValueReachingSink() and + disablingNode = super.getKeywordParameter("verify").asSink() and + argumentOrigin = super.getKeywordParameter("verify").getAValueReachingSink() and // requests treats `None` as the default and all other "falsey" values as `False`. argumentOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and not argumentOrigin.asExpr() instanceof None diff --git a/python/ql/lib/semmle/python/frameworks/Rsa.qll b/python/ql/lib/semmle/python/frameworks/Rsa.qll index 0f0dd2d3d927..75fb4e3a6338 100644 --- a/python/ql/lib/semmle/python/frameworks/Rsa.qll +++ b/python/ql/lib/semmle/python/frameworks/Rsa.qll @@ -34,7 +34,8 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.encrypt */ - class RsaEncryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { + class RsaEncryptCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode + { RsaEncryptCall() { this = API::moduleImport("rsa").getMember("encrypt").getACall() } override DataFlow::Node getInitialization() { result = this } @@ -42,7 +43,7 @@ private module Rsa { override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("message")] + result in [super.getArg(0), super.getArgByName("message")] } override Cryptography::BlockMode getBlockMode() { none() } @@ -53,14 +54,17 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.decrypt */ - class RsaDecryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { + class RsaDecryptCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode + { RsaDecryptCall() { this = API::moduleImport("rsa").getMember("decrypt").getACall() } override DataFlow::Node getInitialization() { result = this } override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("crypto")] } + override DataFlow::Node getAnInput() { + result in [super.getArg(0), super.getArgByName("crypto")] + } override Cryptography::BlockMode getBlockMode() { none() } } @@ -70,7 +74,8 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.sign */ - class RsaSignCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { + class RsaSignCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode + { RsaSignCall() { this = API::moduleImport("rsa").getMember("sign").getACall() } override DataFlow::Node getInitialization() { result = this } @@ -81,14 +86,14 @@ private module Rsa { or // hashing part exists(StringLiteral str, DataFlow::Node hashNameArg | - hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and + hashNameArg in [super.getArg(2), super.getArgByName("hash_method")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and result.matchesName(str.getText()) ) } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("message")] + result in [super.getArg(0), super.getArgByName("message")] } override Cryptography::BlockMode getBlockMode() { none() } @@ -99,7 +104,8 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.verify */ - class RsaVerifyCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { + class RsaVerifyCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode + { RsaVerifyCall() { this = API::moduleImport("rsa").getMember("verify").getACall() } override DataFlow::Node getInitialization() { result = this } @@ -111,9 +117,9 @@ private module Rsa { } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("message")] + result in [super.getArg(0), super.getArgByName("message")] or - result in [this.getArg(1), this.getArgByName("signature")] + result in [super.getArg(1), super.getArgByName("signature")] } override Cryptography::BlockMode getBlockMode() { none() } @@ -124,8 +130,7 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.compute_hash */ - class RsaComputeHashCall extends Cryptography::CryptographicOperation::Range, - DataFlow::CallCfgNode + class RsaComputeHashCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode { RsaComputeHashCall() { this = API::moduleImport("rsa").getMember("compute_hash").getACall() } @@ -133,14 +138,14 @@ private module Rsa { override Cryptography::CryptographicAlgorithm getAlgorithm() { exists(StringLiteral str, DataFlow::Node hashNameArg | - hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and + hashNameArg in [super.getArg(1), super.getArgByName("method_name")] and DataFlow::exprNode(str) = hashNameArg.getALocalSource() and result.matchesName(str.getText()) ) } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("message")] + result in [super.getArg(0), super.getArgByName("message")] } override Cryptography::BlockMode getBlockMode() { none() } @@ -151,7 +156,8 @@ private module Rsa { * * See https://stuvel.eu/python-rsa-doc/reference.html#rsa.sign_hash */ - class RsaSignHashCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode { + class RsaSignHashCall extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode + { RsaSignHashCall() { this = API::moduleImport("rsa").getMember("sign_hash").getACall() } override DataFlow::Node getInitialization() { result = this } @@ -159,7 +165,7 @@ private module Rsa { override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" } override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("hash_value")] + result in [super.getArg(0), super.getArgByName("hash_value")] } override Cryptography::BlockMode getBlockMode() { none() } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 4a3c346fb016..ceb2f1952a02 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2385,15 +2385,16 @@ module StdlibPrivate { } /** A method call on a HttpConnection that sends off a request */ - private class RequestCall extends Http::Client::Request::Range, DataFlow::MethodCallNode { + private class RequestCall extends Http::Client::Request::Range instanceof DataFlow::MethodCallNode + { RequestCall() { this.calls(instance(_), ["request", "_send_request", "putrequest"]) } - DataFlow::Node getUrlArg() { result in [this.getArg(1), this.getArgByName("url")] } + DataFlow::Node getUrlArg() { result in [super.getArg(1), super.getArgByName("url")] } override DataFlow::Node getAUrlPart() { result = this.getUrlArg() or - this.getObject() = instance(result) + super.getObject() = instance(result) } override string getFramework() { result = "http.client.HTTP[S]Connection" } @@ -2430,7 +2431,8 @@ module StdlibPrivate { // a request method exists(RequestCall call | nodeFrom = call.getUrlArg() and - nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() = call.getObject() + nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() = + call.(DataFlow::MethodCallNode).getObject() ) or // `getresponse` call @@ -2797,7 +2799,7 @@ module StdlibPrivate { /** * A hashing operation by supplying initial data when calling the `hashlib.new` function. */ - class HashlibNewCall extends Cryptography::CryptographicOperation::Range, API::CallNode { + class HashlibNewCall extends Cryptography::CryptographicOperation::Range instanceof API::CallNode { string hashName; HashlibNewCall() { @@ -2810,7 +2812,7 @@ module StdlibPrivate { override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } - override DataFlow::Node getAnInput() { result = this.getParameter(1, "data").asSink() } + override DataFlow::Node getAnInput() { result = super.getParameter(1, "data").asSink() } override Cryptography::BlockMode getBlockMode() { none() } } @@ -2818,7 +2820,8 @@ module StdlibPrivate { /** * A hashing operation by using the `update` method on the result of calling the `hashlib.new` function. */ - class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range, API::CallNode { + class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range instanceof API::CallNode + { API::CallNode init; string hashName; @@ -2831,7 +2834,7 @@ module StdlibPrivate { override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result = super.getArg(0) } override Cryptography::BlockMode getBlockMode() { none() } } @@ -2848,8 +2851,7 @@ module StdlibPrivate { * (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by * `HashlibNewCall` and `HashlibNewUpdateCall`. */ - abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::CallCfgNode + abstract class HashlibGenericHashOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallCfgNode { string hashName; API::Node hashClass; @@ -2876,7 +2878,7 @@ module StdlibPrivate { override DataFlow::Node getInitialization() { result = init } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result = this.(DataFlow::CallCfgNode).getArg(0) } } /** @@ -2888,24 +2890,28 @@ module StdlibPrivate { // we only want to model calls to classes such as `hashlib.md5()` if initial data // is passed as an argument this = hashClass.getACall() and - exists([this.getArg(0), this.getArgByName("string")]) + exists( + [ + this.(DataFlow::CallCfgNode).getArg(0), + this.(DataFlow::CallCfgNode).getArgByName("string") + ] + ) } override DataFlow::Node getInitialization() { result = this } override DataFlow::Node getAnInput() { - result = this.getArg(0) + result = this.(DataFlow::CallCfgNode).getArg(0) or // in Python 3.9, you are allowed to use `hashlib.md5(string=)`. - result = this.getArgByName("string") + result = this.(DataFlow::CallCfgNode).getArgByName("string") } } // --------------------------------------------------------------------------- // hmac // --------------------------------------------------------------------------- - abstract class HmacCryptographicOperation extends Cryptography::CryptographicOperation::Range, - API::CallNode + abstract class HmacCryptographicOperation extends Cryptography::CryptographicOperation::Range instanceof API::CallNode { abstract API::Node getDigestArg(); @@ -2937,14 +2943,16 @@ module StdlibPrivate { HmacNewCall() { this = getHmacConstructorCall(digestArg) and // we only want to consider it as an cryptographic operation if the input is available - exists(this.getParameter(1, "msg").asSink()) + exists(this.(API::CallNode).getParameter(1, "msg").asSink()) } override DataFlow::Node getInitialization() { result = this } override API::Node getDigestArg() { result = digestArg } - override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() } + override DataFlow::Node getAnInput() { + result = this.(API::CallNode).getParameter(1, "msg").asSink() + } } /** @@ -2965,7 +2973,9 @@ module StdlibPrivate { override API::Node getDigestArg() { result = digestArg } - override DataFlow::Node getAnInput() { result = this.getParameter(0, "msg").asSink() } + override DataFlow::Node getAnInput() { + result = this.(API::CallNode).getParameter(0, "msg").asSink() + } } /** @@ -2978,9 +2988,11 @@ module StdlibPrivate { override DataFlow::Node getInitialization() { result = this } - override API::Node getDigestArg() { result = this.getParameter(2, "digest") } + override API::Node getDigestArg() { result = this.(API::CallNode).getParameter(2, "digest") } - override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() } + override DataFlow::Node getAnInput() { + result = this.(API::CallNode).getParameter(1, "msg").asSink() + } } // --------------------------------------------------------------------------- diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll index fc385a2f8ed9..6b5764e55925 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll @@ -31,12 +31,14 @@ private module Urllib { * See * - https://docs.python.org/3.9/library/urllib.request.html#urllib.request.Request */ - private class RequestCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class RequestCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { RequestCall() { this = API::moduleImport("urllib").getMember("request").getMember("Request").getACall() } - override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] } + override DataFlow::Node getAUrlPart() { + result in [super.getArg(0), super.getArgByName("url")] + } override string getFramework() { result = "urllib.request.Request" } @@ -53,12 +55,14 @@ private module Urllib { * See * - https://docs.python.org/3.9/library/urllib.request.html#urllib.request.urlopen */ - private class UrlOpenCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class UrlOpenCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { UrlOpenCall() { this = API::moduleImport("urllib").getMember("request").getMember("urlopen").getACall() } - override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] } + override DataFlow::Node getAUrlPart() { + result in [super.getArg(0), super.getArgByName("url")] + } override string getFramework() { result = "urllib.request.urlopen" } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib2.qll b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib2.qll index d440b1852c93..39f2a336d85e 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib2.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib/Urllib2.qll @@ -20,10 +20,10 @@ private module Urllib2 { * See * - https://docs.python.org/2/library/urllib2.html#urllib2.Request */ - private class RequestCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class RequestCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { RequestCall() { this = API::moduleImport("urllib2").getMember("Request").getACall() } - override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] } + override DataFlow::Node getAUrlPart() { result in [super.getArg(0), super.getArgByName("url")] } override string getFramework() { result = "urllib2.Request" } @@ -40,10 +40,10 @@ private module Urllib2 { * See * - https://docs.python.org/2/library/urllib2.html#urllib2.urlopen */ - private class UrlOpenCall extends Http::Client::Request::Range, DataFlow::CallCfgNode { + private class UrlOpenCall extends Http::Client::Request::Range instanceof DataFlow::CallCfgNode { UrlOpenCall() { this = API::moduleImport("urllib2").getMember("urlopen").getACall() } - override DataFlow::Node getAUrlPart() { result in [this.getArg(0), this.getArgByName("url")] } + override DataFlow::Node getAUrlPart() { result in [super.getArg(0), super.getArgByName("url")] } override string getFramework() { result = "urllib2.urlopen" } diff --git a/python/ql/lib/semmle/python/frameworks/Urllib3.qll b/python/ql/lib/semmle/python/frameworks/Urllib3.qll index ee35fc9af0a9..02705527e574 100644 --- a/python/ql/lib/semmle/python/frameworks/Urllib3.qll +++ b/python/ql/lib/semmle/python/frameworks/Urllib3.qll @@ -54,7 +54,7 @@ module Urllib3 { * - https://urllib3.readthedocs.io/en/stable/reference/urllib3.request.html#urllib3.request.RequestMethods * - https://urllib3.readthedocs.io/en/stable/reference/urllib3.connectionpool.html#urllib3.HTTPConnectionPool.urlopen */ - private class RequestCall extends Http::Client::Request::Range, API::CallNode { + private class RequestCall extends Http::Client::Request::Range instanceof API::CallNode { RequestCall() { this = classRef() @@ -63,7 +63,9 @@ module Urllib3 { .getACall() } - override DataFlow::Node getAUrlPart() { result in [this.getArg(1), this.getArgByName("url")] } + override DataFlow::Node getAUrlPart() { + result in [super.getArg(1), super.getArgByName("url")] + } override string getFramework() { result = "urllib3.PoolManager" } diff --git a/python/ql/lib/semmle/python/internal/ConceptsImports.qll b/python/ql/lib/semmle/python/internal/ConceptsImports.qll deleted file mode 100644 index 73ab482f829e..000000000000 --- a/python/ql/lib/semmle/python/internal/ConceptsImports.qll +++ /dev/null @@ -1,7 +0,0 @@ -/** - * This file contains imports required for the Python version of `ConceptsShared.qll`. - * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages. - */ - -import semmle.python.dataflow.new.DataFlow -import semmle.python.concepts.CryptoAlgorithms as CryptoAlgorithms diff --git a/python/ql/lib/semmle/python/internal/ConceptsShared.qll b/python/ql/lib/semmle/python/internal/ConceptsShared.qll deleted file mode 100644 index 1b13e4ebb17e..000000000000 --- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Provides Concepts which are shared across languages. - * - * Each language has a language specific `Concepts.qll` file that can import the - * shared concepts from this file. A language can either re-export the concept directly, - * or can add additional member-predicates that are needed for that language. - * - * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from - * each language, but we will maintain a discipline of moving those concepts to - * `ConceptsShared.qll` ASAP. - */ - -private import ConceptsImports - -/** - * Provides models for cryptographic concepts. - * - * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into - * consideration for the `isWeak` member predicate. So RSA is always considered - * secure, although using a low number of bits will actually make it insecure. We plan - * to improve our libraries in the future to more precisely capture this aspect. - */ -module Cryptography { - class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - - class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - - class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - - class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; - - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `CryptographicOperation::Range` instead. - */ - class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } - - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - DataFlow::Node getInitialization() { result = super.getInitialization() } - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - BlockMode getBlockMode() { result = super.getBlockMode() } - } - - /** Provides classes for modeling new applications of a cryptographic algorithms. */ - module CryptographicOperation { - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `CryptographicOperation` instead. - */ - abstract class Range extends DataFlow::Node { - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - abstract DataFlow::Node getInitialization(); - - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - abstract CryptographicAlgorithm getAlgorithm(); - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - abstract DataFlow::Node getAnInput(); - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - abstract BlockMode getBlockMode(); - } - } - - /** - * A cryptographic block cipher mode of operation. This can be used to encrypt - * data of arbitrary length using a block encryption algorithm. - */ - class BlockMode extends string { - BlockMode() { - this = - [ - "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP", - "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final - "EAX" // https://en.wikipedia.org/wiki/EAX_mode - ] - } - - /** Holds if this block mode is considered to be insecure. */ - predicate isWeak() { this = "ECB" } - - /** Holds if the given string appears to match this block mode. */ - bindingset[s] - predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") } - } -} - -/** Provides classes for modeling HTTP-related APIs. */ -module Http { - /** Provides classes for modeling HTTP clients. */ - module Client { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `Http::Client::Request::Range` instead. - */ - class Request extends DataFlow::Node instanceof Request::Range { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - DataFlow::Node getAUrlPart() { result = super.getAUrlPart() } - - /** Gets a string that identifies the framework used for this request. */ - string getFramework() { result = super.getFramework() } - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ) { - super.disablesCertificateValidation(disablingNode, argumentOrigin) - } - } - - /** Provides a class for modeling new HTTP requests. */ - module Request { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `Http::Client::Request` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - abstract DataFlow::Node getAUrlPart(); - - /** Gets a string that identifies the framework used for this request. */ - abstract string getFramework(); - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - abstract predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ); - } - } - } -} diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll deleted file mode 100644 index ede88ebf8149..000000000000 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ /dev/null @@ -1,188 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. - * Such that we can share this logic across our CodeQL analysis of different languages. - * - * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. - */ - -/** - * A classification of different kinds of sensitive data: - * - * - secret: generic secret or trusted data; - * - id: a user name or other account information; - * - password: a password or authorization key; - * - certificate: a certificate. - * - private: private data such as credit card numbers - * - * While classifications are represented as strings, this should not be relied upon. - * Instead, use the predicates in `SensitiveDataClassification::` to work with - * classifications. - */ -class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } -} - -/** - * Provides predicates to select the different kinds of sensitive data we support. - */ -module SensitiveDataClassification { - /** Gets the classification for secret or trusted data. */ - SensitiveDataClassification secret() { result = "secret" } - - /** Gets the classification for user names or other account information. */ - SensitiveDataClassification id() { result = "id" } - - /** Gets the classification for passwords or authorization keys. */ - SensitiveDataClassification password() { result = "password" } - - /** Gets the classification for certificates. */ - SensitiveDataClassification certificate() { result = "certificate" } - - /** Gets the classification for private data. */ - SensitiveDataClassification private() { result = "private" } -} - -/** - * INTERNAL: Do not use. - * - * Provides heuristics for identifying names related to sensitive information. - */ -module HeuristicNames { - /** - * Gets a regular expression that identifies strings that may indicate the presence of secret - * or trusted data. - */ - string maybeSecret() { result = "(?is).*((?; /** * A data-flow node that constructs a SQL statement. @@ -682,7 +686,7 @@ module Http { /** Provides classes for modeling HTTP clients. */ module Client { - import codeql.ruby.internal.ConceptsShared::Http::Client as SC + import ConceptsShared::Http::Client as SC /** * A method call that makes an outgoing HTTP request. @@ -1041,7 +1045,7 @@ module Cryptography { // modify that part of the shared concept... which means we have to explicitly // re-export everything else. // Using SC shorthand for "Shared Cryptography" - import codeql.ruby.internal.ConceptsShared::Cryptography as SC + import ConceptsShared::Cryptography as SC class CryptographicAlgorithm = SC::CryptographicAlgorithm; diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll index 122202c63b78..a034ad43f023 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll @@ -183,8 +183,7 @@ module ActiveResource { CollectionSource getCollection() { result = collection } } - private class ModelClassMethodCallAsHttpRequest extends Http::Client::Request::Range, - ModelClassMethodCall + private class ModelClassMethodCallAsHttpRequest extends Http::Client::Request::Range instanceof ModelClassMethodCall { ModelClassMethodCallAsHttpRequest() { this.getMethodName() = ["all", "build", "create", "create!", "find", "first", "last"] @@ -195,20 +194,19 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - this.getModelClass().disablesCertificateValidation(disablingNode) and + super.getModelClass().disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } override DataFlow::Node getAUrlPart() { - result = this.getModelClass().getASiteAssignment().getAUrlPart() + result = super.getModelClass().getASiteAssignment().getAUrlPart() } override DataFlow::Node getResponseBody() { result = this } } - private class ModelInstanceMethodCallAsHttpRequest extends Http::Client::Request::Range, - ModelInstanceMethodCall + private class ModelInstanceMethodCallAsHttpRequest extends Http::Client::Request::Range instanceof ModelInstanceMethodCall { ModelInstanceMethodCallAsHttpRequest() { this.getMethodName() = @@ -223,13 +221,13 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - this.getModelClass().disablesCertificateValidation(disablingNode) and + super.getModelClass().disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } override DataFlow::Node getAUrlPart() { - result = this.getModelClass().getASiteAssignment().getAUrlPart() + result = super.getModelClass().getASiteAssignment().getAUrlPart() } override DataFlow::Node getResponseBody() { result = this } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll index adf7384183e1..e2ba1eb48fec 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll @@ -23,7 +23,7 @@ private import codeql.ruby.DataFlow * TODO: pipelining, streaming responses * https://github.com/excon/excon/blob/master/README.md */ -class ExconHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class ExconHttpRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; API::Node connectionNode; DataFlow::Node connectionUse; @@ -54,9 +54,9 @@ class ExconHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode // For one-off requests, the URL is in the first argument of the request method call. // For connection re-use, the URL is split between the first argument of the `new` call // and the `path` keyword argument of the request method call. - result = this.getArgument(0) and not result.asExpr().getExpr() instanceof Pair + result = super.getArgument(0) and not result.asExpr().getExpr() instanceof Pair or - result = this.getKeywordArgument("path") + result = super.getKeywordArgument("path") or result = connectionUse.(DataFlow::CallNode).getArgument(0) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll index 834180a7ee49..961732da0d08 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll @@ -22,7 +22,7 @@ private import codeql.ruby.DataFlow * connection.get("/").body * ``` */ -class FaradayHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class FaradayHttpRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; API::Node connectionNode; DataFlow::Node connectionUse; @@ -47,7 +47,7 @@ class FaradayHttpRequest extends Http::Client::Request::Range, DataFlow::CallNod override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") } override DataFlow::Node getAUrlPart() { - result = this.getArgument(0) or + result = super.getArgument(0) or result = connectionUse.(DataFlow::CallNode).getArgument(0) or result = connectionUse.(DataFlow::CallNode).getKeywordArgument("url") } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll index c766ef96f23a..3fcd9b36703b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll @@ -14,7 +14,7 @@ private import codeql.ruby.DataFlow * HTTPClient.get_content("http://example.com") * ``` */ -class HttpClientRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class HttpClientRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; API::Node connectionNode; string method; @@ -34,7 +34,7 @@ class HttpClientRequest extends Http::Client::Request::Range, DataFlow::CallNode ] } - override DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + override DataFlow::Node getAUrlPart() { result = super.getArgument(0) } override DataFlow::Node getResponseBody() { // The `get_content` and `post_content` methods return the response body as diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll index e9f94f771f1a..fd0838c3f975 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll @@ -23,7 +23,7 @@ private import codeql.ruby.DataFlow * MyClass.new("http://example.com") * ``` */ -class HttpartyRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class HttpartyRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; HttpartyRequest() { @@ -33,7 +33,7 @@ class HttpartyRequest extends Http::Client::Request::Range, DataFlow::CallNode { .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) } - override DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + override DataFlow::Node getAUrlPart() { result = super.getArgument(0) } override DataFlow::Node getResponseBody() { // If HTTParty can recognise the response type, it will parse and return it @@ -49,7 +49,7 @@ class HttpartyRequest extends Http::Client::Request::Range, DataFlow::CallNode { /** Gets the value that controls certificate validation, if any. */ DataFlow::Node getCertificateValidationControllingValue() { - result = this.getKeywordArgumentIncludeHashArgument(["verify", "verify_peer"]) + result = super.getKeywordArgumentIncludeHashArgument(["verify", "verify_peer"]) } cached diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll index e09917ae21ab..3a0b484e5465 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll @@ -18,7 +18,7 @@ private import codeql.ruby.DataFlow * response = req.get("/") * ``` */ -class NetHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class NetHttpRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { private DataFlow::CallNode request; private API::Node requestNode; private boolean returnsResponseBody; diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll index 8ccb744f84e1..38d6aced09f9 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll @@ -18,7 +18,7 @@ private import codeql.ruby.frameworks.Core * URI.parse("http://example.com").open.read * ``` */ -class OpenUriRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class OpenUriRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; OpenUriRequest() { @@ -30,7 +30,7 @@ class OpenUriRequest extends Http::Client::Request::Range, DataFlow::CallNode { this = requestNode.asSource() } - override DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + override DataFlow::Node getAUrlPart() { result = super.getArgument(0) } override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall(["read", "readlines"]) @@ -38,7 +38,7 @@ class OpenUriRequest extends Http::Client::Request::Range, DataFlow::CallNode { /** Gets the value that controls certificate validation, if any. */ DataFlow::Node getCertificateValidationControllingValue() { - result = this.getKeywordArgumentIncludeHashArgument("ssl_verify_mode") + result = super.getKeywordArgumentIncludeHashArgument("ssl_verify_mode") } cached @@ -60,11 +60,10 @@ class OpenUriRequest extends Http::Client::Request::Range, DataFlow::CallNode { * Kernel.open("http://example.com").read * ``` */ -class OpenUriKernelOpenRequest extends Http::Client::Request::Range, DataFlow::CallNode instanceof KernelMethodCall -{ +class OpenUriKernelOpenRequest extends Http::Client::Request::Range instanceof KernelMethodCall { OpenUriKernelOpenRequest() { this.getMethodName() = "open" } - override DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + override DataFlow::Node getAUrlPart() { result = super.getArgument(0) } override DataFlow::CallNode getResponseBody() { result.asExpr().getExpr().(MethodCall).getMethodName() in ["read", "readlines"] and @@ -73,14 +72,14 @@ class OpenUriKernelOpenRequest extends Http::Client::Request::Range, DataFlow::C /** Gets the value that controls certificate validation, if any. */ DataFlow::Node getCertificateValidationControllingValue() { - result = this.getKeywordArgument("ssl_verify_mode") + result = super.getKeywordArgument("ssl_verify_mode") or // using a hashliteral exists( DataFlow::LocalSourceNode optionsNode, CfgNodes::ExprNodes::PairCfgNode p, DataFlow::Node key | // can't flow to argument 0, since that's the URL - optionsNode.flowsTo(this.getArgument(any(int i | i > 0))) and + optionsNode.flowsTo(super.getArgument(any(int i | i > 0))) and p = optionsNode.asExpr().(CfgNodes::ExprNodes::HashLiteralCfgNode).getAKeyValuePair() and key.asExpr() = p.getKey() and key.getALocalSource().asExpr().getConstantValue().isStringlikeValue("ssl_verify_mode") and diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll index cac94f7166f2..268233c27def 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll @@ -16,7 +16,7 @@ private import codeql.ruby.DataFlow * RestClient::Request.execute(url: "http://example.com").body * ``` */ -class RestClientHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class RestClientHttpRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; API::Node connectionNode; @@ -37,9 +37,9 @@ class RestClientHttpRequest extends Http::Client::Request::Range, DataFlow::Call } override DataFlow::Node getAUrlPart() { - result = this.getKeywordArgument("url") + result = super.getKeywordArgument("url") or - result = this.getArgument(0) and + result = super.getArgument(0) and // this rules out the alternative above not result.asExpr().getExpr() instanceof Pair } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll index 2eae03a77481..19d3db23ece6 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll @@ -14,7 +14,7 @@ private import codeql.ruby.DataFlow * Typhoeus.get("http://example.com").body * ``` */ -class TyphoeusHttpRequest extends Http::Client::Request::Range, DataFlow::CallNode { +class TyphoeusHttpRequest extends Http::Client::Request::Range instanceof DataFlow::CallNode { API::Node requestNode; boolean directResponse; @@ -31,7 +31,7 @@ class TyphoeusHttpRequest extends Http::Client::Request::Range, DataFlow::CallNo ) } - override DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + override DataFlow::Node getAUrlPart() { result = super.getArgument(0) } override DataFlow::Node getResponseBody() { directResponse = true and @@ -43,7 +43,7 @@ class TyphoeusHttpRequest extends Http::Client::Request::Range, DataFlow::CallNo /** Gets the value that controls certificate validation, if any. */ DataFlow::Node getCertificateValidationControllingValue() { - result = this.getKeywordArgumentIncludeHashArgument("ssl_verifypeer") + result = super.getKeywordArgumentIncludeHashArgument("ssl_verifypeer") } cached diff --git a/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll b/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll deleted file mode 100644 index 478fffe56aef..000000000000 --- a/ruby/ql/lib/codeql/ruby/internal/ConceptsImports.qll +++ /dev/null @@ -1,7 +0,0 @@ -/** - * This file contains imports required for the Ruby version of `ConceptsShared.qll`. - * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages. - */ - -import codeql.ruby.DataFlow -import codeql.ruby.security.CryptoAlgorithms as CryptoAlgorithms diff --git a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll b/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll deleted file mode 100644 index 1b13e4ebb17e..000000000000 --- a/ruby/ql/lib/codeql/ruby/internal/ConceptsShared.qll +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Provides Concepts which are shared across languages. - * - * Each language has a language specific `Concepts.qll` file that can import the - * shared concepts from this file. A language can either re-export the concept directly, - * or can add additional member-predicates that are needed for that language. - * - * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from - * each language, but we will maintain a discipline of moving those concepts to - * `ConceptsShared.qll` ASAP. - */ - -private import ConceptsImports - -/** - * Provides models for cryptographic concepts. - * - * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into - * consideration for the `isWeak` member predicate. So RSA is always considered - * secure, although using a low number of bits will actually make it insecure. We plan - * to improve our libraries in the future to more precisely capture this aspect. - */ -module Cryptography { - class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - - class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - - class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - - class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; - - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `CryptographicOperation::Range` instead. - */ - class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } - - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - DataFlow::Node getInitialization() { result = super.getInitialization() } - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - BlockMode getBlockMode() { result = super.getBlockMode() } - } - - /** Provides classes for modeling new applications of a cryptographic algorithms. */ - module CryptographicOperation { - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `CryptographicOperation` instead. - */ - abstract class Range extends DataFlow::Node { - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - abstract DataFlow::Node getInitialization(); - - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - abstract CryptographicAlgorithm getAlgorithm(); - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - abstract DataFlow::Node getAnInput(); - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - abstract BlockMode getBlockMode(); - } - } - - /** - * A cryptographic block cipher mode of operation. This can be used to encrypt - * data of arbitrary length using a block encryption algorithm. - */ - class BlockMode extends string { - BlockMode() { - this = - [ - "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP", - "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final - "EAX" // https://en.wikipedia.org/wiki/EAX_mode - ] - } - - /** Holds if this block mode is considered to be insecure. */ - predicate isWeak() { this = "ECB" } - - /** Holds if the given string appears to match this block mode. */ - bindingset[s] - predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") } - } -} - -/** Provides classes for modeling HTTP-related APIs. */ -module Http { - /** Provides classes for modeling HTTP clients. */ - module Client { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `Http::Client::Request::Range` instead. - */ - class Request extends DataFlow::Node instanceof Request::Range { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - DataFlow::Node getAUrlPart() { result = super.getAUrlPart() } - - /** Gets a string that identifies the framework used for this request. */ - string getFramework() { result = super.getFramework() } - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ) { - super.disablesCertificateValidation(disablingNode, argumentOrigin) - } - } - - /** Provides a class for modeling new HTTP requests. */ - module Request { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `Http::Client::Request` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - abstract DataFlow::Node getAUrlPart(); - - /** Gets a string that identifies the framework used for this request. */ - abstract string getFramework(); - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - abstract predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ); - } - } - } -} diff --git a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll index 7176c666c573..13a03a3bd888 100644 --- a/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll +++ b/ruby/ql/lib/codeql/ruby/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll index 26f6d996f144..40b3ac036cc1 100644 --- a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll +++ b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll @@ -3,7 +3,7 @@ * an underlying OpenSSL or LibreSSL C library. */ -private import internal.CryptoAlgorithmNames +private import codeql.concepts.internal.CryptoAlgorithmNames private import codeql.ruby.Concepts private import codeql.ruby.DataFlow private import codeql.ruby.ApiGraphs @@ -544,8 +544,7 @@ private class CipherNode extends DataFlow::Node { } /** An operation using the OpenSSL library that uses a cipher. */ -private class CipherOperation extends Cryptography::CryptographicOperation::Range, - DataFlow::CallNode +private class CipherOperation extends Cryptography::CryptographicOperation::Range instanceof DataFlow::CallNode { private CipherNode cipherNode; @@ -564,8 +563,8 @@ private class CipherOperation extends Cryptography::CryptographicOperation::Rang } override DataFlow::Node getAnInput() { - this.getMethodName() = "update" and - result = this.getArgument(0) + super.getMethodName() = "update" and + result = super.getArgument(0) } override Cryptography::BlockMode getBlockMode() { diff --git a/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll b/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll index 34beb33604b7..e0dc68e7136e 100644 --- a/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll +++ b/ruby/ql/lib/codeql/ruby/security/SensitiveActions.qll @@ -11,7 +11,7 @@ private import codeql.ruby.AST private import codeql.ruby.DataFlow -import codeql.ruby.security.internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import HeuristicNames private import codeql.ruby.CFG diff --git a/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll b/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll index 3338bbf65f70..f2867fa14bfe 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll @@ -8,8 +8,8 @@ private import codeql.ruby.AST private import codeql.ruby.DataFlow private import codeql.ruby.TaintTracking::TaintTracking private import codeql.ruby.dataflow.RemoteFlowSources -private import SensitiveDataHeuristics::HeuristicNames -private import SensitiveDataHeuristics +private import codeql.concepts.internal.SensitiveDataHeuristics::HeuristicNames +private import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.ruby.CFG private import codeql.ruby.dataflow.SSA diff --git a/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll b/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/ruby/ql/lib/codeql/ruby/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index 1c2ec3fc09b4..befe2c9dcdc1 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ dbscheme: ruby.dbscheme upgrades: upgrades library: true dependencies: + codeql/concepts: ${workspace} codeql/controlflow: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} diff --git a/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql b/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql index c4c42532d899..f19368e3656f 100644 --- a/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql +++ b/ruby/ql/test/library-tests/security/CryptoAlgorithms.ql @@ -1,5 +1,5 @@ import codeql.ruby.AST -import codeql.ruby.security.CryptoAlgorithms +import codeql.concepts.CryptoAlgorithms query predicate weakHashingAlgorithms(HashingAlgorithm ha) { ha.isWeak() } diff --git a/rust/ql/lib/codeql/rust/Concepts.qll b/rust/ql/lib/codeql/rust/Concepts.qll index 73dd629cef88..20247e088752 100644 --- a/rust/ql/lib/codeql/rust/Concepts.qll +++ b/rust/ql/lib/codeql/rust/Concepts.qll @@ -5,11 +5,16 @@ */ private import codeql.rust.dataflow.DataFlow +private import codeql.rust.dataflow.internal.DataFlowImpl +private import codeql.Locations private import codeql.threatmodels.ThreatModels private import codeql.rust.Frameworks private import codeql.rust.dataflow.FlowSource private import codeql.rust.controlflow.ControlFlowGraph as Cfg private import codeql.rust.controlflow.CfgNodes as CfgNodes +private import codeql.concepts.ConceptsShared + +private module ConceptsShared = ConceptsMake; /** * A data flow source for a specific threat-model. @@ -302,7 +307,7 @@ module SqlSanitization { * Provides models for cryptographic things. */ module Cryptography { - private import codeql.rust.internal.ConceptsShared::Cryptography as SC + import ConceptsShared::Cryptography as SC final class CryptographicOperation = SC::CryptographicOperation; diff --git a/rust/ql/lib/codeql/rust/internal/ConceptsImports.qll b/rust/ql/lib/codeql/rust/internal/ConceptsImports.qll deleted file mode 100644 index 341f3ade509f..000000000000 --- a/rust/ql/lib/codeql/rust/internal/ConceptsImports.qll +++ /dev/null @@ -1,7 +0,0 @@ -/** - * This file contains imports required for the Rust version of `ConceptsShared.qll`. - * Since they are language-specific, they can't be placed directly in that file, as it is shared between languages. - */ - -import codeql.rust.dataflow.DataFlow::DataFlow as DataFlow -import codeql.rust.security.CryptoAlgorithms as CryptoAlgorithms diff --git a/rust/ql/lib/codeql/rust/internal/ConceptsShared.qll b/rust/ql/lib/codeql/rust/internal/ConceptsShared.qll deleted file mode 100644 index 1b13e4ebb17e..000000000000 --- a/rust/ql/lib/codeql/rust/internal/ConceptsShared.qll +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Provides Concepts which are shared across languages. - * - * Each language has a language specific `Concepts.qll` file that can import the - * shared concepts from this file. A language can either re-export the concept directly, - * or can add additional member-predicates that are needed for that language. - * - * Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from - * each language, but we will maintain a discipline of moving those concepts to - * `ConceptsShared.qll` ASAP. - */ - -private import ConceptsImports - -/** - * Provides models for cryptographic concepts. - * - * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into - * consideration for the `isWeak` member predicate. So RSA is always considered - * secure, although using a low number of bits will actually make it insecure. We plan - * to improve our libraries in the future to more precisely capture this aspect. - */ -module Cryptography { - class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm; - - class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm; - - class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm; - - class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm; - - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `CryptographicOperation::Range` instead. - */ - class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } - - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - DataFlow::Node getInitialization() { result = super.getInitialization() } - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - BlockMode getBlockMode() { result = super.getBlockMode() } - } - - /** Provides classes for modeling new applications of a cryptographic algorithms. */ - module CryptographicOperation { - /** - * A data flow node that is an application of a cryptographic algorithm. For example, - * encryption, decryption, signature-validation. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `CryptographicOperation` instead. - */ - abstract class Range extends DataFlow::Node { - /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ - abstract DataFlow::Node getInitialization(); - - /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - abstract CryptographicAlgorithm getAlgorithm(); - - /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - abstract DataFlow::Node getAnInput(); - - /** - * Gets the block mode used to perform this cryptographic operation. - * - * This predicate is only expected to have a result if two conditions hold: - * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and - * 2. The algorithm used is a block cipher (not a stream cipher). - * - * If either of these conditions do not hold, then this predicate should have no result. - */ - abstract BlockMode getBlockMode(); - } - } - - /** - * A cryptographic block cipher mode of operation. This can be used to encrypt - * data of arbitrary length using a block encryption algorithm. - */ - class BlockMode extends string { - BlockMode() { - this = - [ - "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP", - "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final - "EAX" // https://en.wikipedia.org/wiki/EAX_mode - ] - } - - /** Holds if this block mode is considered to be insecure. */ - predicate isWeak() { this = "ECB" } - - /** Holds if the given string appears to match this block mode. */ - bindingset[s] - predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") } - } -} - -/** Provides classes for modeling HTTP-related APIs. */ -module Http { - /** Provides classes for modeling HTTP clients. */ - module Client { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to refine existing API models. If you want to model new APIs, - * extend `Http::Client::Request::Range` instead. - */ - class Request extends DataFlow::Node instanceof Request::Range { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - DataFlow::Node getAUrlPart() { result = super.getAUrlPart() } - - /** Gets a string that identifies the framework used for this request. */ - string getFramework() { result = super.getFramework() } - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ) { - super.disablesCertificateValidation(disablingNode, argumentOrigin) - } - } - - /** Provides a class for modeling new HTTP requests. */ - module Request { - /** - * A data flow node that makes an outgoing HTTP request. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `Http::Client::Request` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets a data flow node that contributes to the URL of the request. - * Depending on the framework, a request may have multiple nodes which contribute to the URL. - */ - abstract DataFlow::Node getAUrlPart(); - - /** Gets a string that identifies the framework used for this request. */ - abstract string getFramework(); - - /** - * Holds if this request is made using a mode that disables SSL/TLS - * certificate validation, where `disablingNode` represents the point at - * which the validation was disabled, and `argumentOrigin` represents the origin - * of the argument that disabled the validation (which could be the same node as - * `disablingNode`). - */ - abstract predicate disablesCertificateValidation( - DataFlow::Node disablingNode, DataFlow::Node argumentOrigin - ); - } - } - } -} diff --git a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll index 7176c666c573..13a03a3bd888 100644 --- a/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll +++ b/rust/ql/lib/codeql/rust/security/CryptoAlgorithms.qll @@ -1,117 +1,5 @@ /** * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ -private import internal.CryptoAlgorithmNames - -/** - * A cryptographic algorithm. - */ -private newtype TCryptographicAlgorithm = - MkHashingAlgorithm(string name, boolean isWeak) { - isStrongHashingAlgorithm(name) and isWeak = false - or - isWeakHashingAlgorithm(name) and isWeak = true - } or - MkEncryptionAlgorithm(string name, boolean isWeak) { - isStrongEncryptionAlgorithm(name) and isWeak = false - or - isWeakEncryptionAlgorithm(name) and isWeak = true - } or - MkPasswordHashingAlgorithm(string name, boolean isWeak) { - isStrongPasswordHashingAlgorithm(name) and isWeak = false - or - isWeakPasswordHashingAlgorithm(name) and isWeak = true - } - -/** - * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. - * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. - * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. - */ -bindingset[name] -private CryptographicAlgorithm getBestAlgorithmForName(string name) { - result = - max(CryptographicAlgorithm algorithm | - algorithm.getName() = - [ - name.toUpperCase(), // the full name - name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces - name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores - ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces - | - algorithm order by algorithm.getName().length() - ) -} - -/** - * A cryptographic algorithm. - */ -abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { - /** Gets a textual representation of this element. */ - string toString() { result = this.getName() } - - /** - * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). - */ - abstract string getName(); - - /** - * Holds if the name of this algorithm is the most specific match for `name`. - * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. - */ - bindingset[name] - predicate matchesName(string name) { this = getBestAlgorithmForName(name) } - - /** - * Holds if this algorithm is weak. - */ - abstract predicate isWeak(); -} - -/** - * A hashing algorithm such as `MD5` or `SHA512`. - */ -class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} - -/** - * An encryption algorithm such as `DES` or `AES512`. - */ -class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } - - /** Holds if this algorithm is a stream cipher. */ - predicate isStreamCipher() { isStreamCipher(name) } -} - -/** - * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. - */ -class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { - string name; - boolean isWeak; - - PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } - - override string getName() { result = name } - - override predicate isWeak() { isWeak = true } -} +import codeql.concepts.CryptoAlgorithms diff --git a/rust/ql/lib/codeql/rust/security/SensitiveData.qll b/rust/ql/lib/codeql/rust/security/SensitiveData.qll index bf3364abdb6b..4e6ba21a2d28 100644 --- a/rust/ql/lib/codeql/rust/security/SensitiveData.qll +++ b/rust/ql/lib/codeql/rust/security/SensitiveData.qll @@ -6,7 +6,7 @@ */ import rust -import internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.rust.dataflow.DataFlow /** diff --git a/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll b/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll deleted file mode 100644 index 8bb63d97876a..000000000000 --- a/rust/ql/lib/codeql/rust/security/internal/CryptoAlgorithmNames.qll +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Names of cryptographic algorithms, separated into strong and weak variants. - * - * The names are normalized: upper-case, no spaces, dashes or underscores. - * - * The names are inspired by the names used in real world crypto libraries. - * - * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). - */ - -/** - * Holds if `name` corresponds to a strong hashing algorithm. - */ -predicate isStrongHashingAlgorithm(string name) { - name = - [ - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2 - // and https://www.blake2.net/ - "BLAKE2", "BLAKE2B", "BLAKE2S", - // see https://github.com/BLAKE3-team/BLAKE3 - "BLAKE3", - // - "DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2", - "SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128 - "SHAKE128", "SHAKE256", - // see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3 - "SM3", - // see https://security.stackexchange.com/a/216297 - "WHIRLPOOL", - ] -} - -/** - * Holds if `name` corresponds to a weak hashing algorithm. - */ -predicate isWeakHashingAlgorithm(string name) { - name = - [ - "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160", - "RIPEMD320", "SHA0", "SHA1" - ] -} - -/** - * Holds if `name` corresponds to a strong encryption algorithm. - */ -predicate isStrongEncryptionAlgorithm(string name) { - name = - [ - "AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512", - "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192", - "CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89", - "IDEA", "RABBIT", "RSA", "SEED", "SM4" - ] -} - -/** - * Holds if `name` corresponds to a weak encryption algorithm. - */ -predicate isWeakEncryptionAlgorithm(string name) { - name = - [ - "DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", - "ARCFOUR", "ARC5", "RC5" - ] -} - -/** - * Holds if `name` corresponds to a strong password hashing algorithm. - */ -predicate isStrongPasswordHashingAlgorithm(string name) { - name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"] -} - -/** - * Holds if `name` corresponds to a weak password hashing algorithm. - */ -predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" } - -/** - * Holds if `name` corresponds to a stream cipher. - */ -predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] } diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll deleted file mode 100644 index ede88ebf8149..000000000000 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ /dev/null @@ -1,188 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. - * Such that we can share this logic across our CodeQL analysis of different languages. - * - * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. - */ - -/** - * A classification of different kinds of sensitive data: - * - * - secret: generic secret or trusted data; - * - id: a user name or other account information; - * - password: a password or authorization key; - * - certificate: a certificate. - * - private: private data such as credit card numbers - * - * While classifications are represented as strings, this should not be relied upon. - * Instead, use the predicates in `SensitiveDataClassification::` to work with - * classifications. - */ -class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } -} - -/** - * Provides predicates to select the different kinds of sensitive data we support. - */ -module SensitiveDataClassification { - /** Gets the classification for secret or trusted data. */ - SensitiveDataClassification secret() { result = "secret" } - - /** Gets the classification for user names or other account information. */ - SensitiveDataClassification id() { result = "id" } - - /** Gets the classification for passwords or authorization keys. */ - SensitiveDataClassification password() { result = "password" } - - /** Gets the classification for certificates. */ - SensitiveDataClassification certificate() { result = "certificate" } - - /** Gets the classification for private data. */ - SensitiveDataClassification private() { result = "private" } -} - -/** - * INTERNAL: Do not use. - * - * Provides heuristics for identifying names related to sensitive information. - */ -module HeuristicNames { - /** - * Gets a regular expression that identifies strings that may indicate the presence of secret - * or trusted data. - */ - string maybeSecret() { result = "(?is).*((? DataFlowLang> { + final private class DataFlowNode = DataFlowLang::Node; + + /** + * Provides models for cryptographic concepts. + * + * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into + * consideration for the `isWeak` member predicate. So RSA is always considered + * secure, although using a low number of bits will actually make it insecure. We plan + * to improve our libraries in the future to more precisely capture this aspect. + */ + module Cryptography { + class CryptographicAlgorithm = CA::CryptographicAlgorithm; + + class EncryptionAlgorithm = CA::EncryptionAlgorithm; + + class HashingAlgorithm = CA::HashingAlgorithm; + + class PasswordHashingAlgorithm = CA::PasswordHashingAlgorithm; + + /** + * A data flow node that is an application of a cryptographic algorithm. For example, + * encryption, decryption, signature-validation. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `CryptographicOperation::Range` instead. + */ + class CryptographicOperation extends DataFlowNode instanceof CryptographicOperation::Range { + /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ + CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } + + /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ + DataFlowNode getInitialization() { result = super.getInitialization() } + + /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ + DataFlowNode getAnInput() { result = super.getAnInput() } + + /** + * Gets the block mode used to perform this cryptographic operation. + * + * This predicate is only expected to have a result if two conditions hold: + * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and + * 2. The algorithm used is a block cipher (not a stream cipher). + * + * If either of these conditions do not hold, then this predicate should have no result. + */ + BlockMode getBlockMode() { result = super.getBlockMode() } + } + + /** Provides classes for modeling new applications of a cryptographic algorithms. */ + module CryptographicOperation { + /** + * A data flow node that is an application of a cryptographic algorithm. For example, + * encryption, decryption, signature-validation. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `CryptographicOperation` instead. + */ + abstract class Range extends DataFlowNode { + /** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */ + abstract DataFlowNode getInitialization(); + + /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ + abstract CryptographicAlgorithm getAlgorithm(); + + /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ + abstract DataFlowNode getAnInput(); + + /** + * Gets the block mode used to perform this cryptographic operation. + * + * This predicate is only expected to have a result if two conditions hold: + * 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and + * 2. The algorithm used is a block cipher (not a stream cipher). + * + * If either of these conditions do not hold, then this predicate should have no result. + */ + abstract BlockMode getBlockMode(); + } + } + + /** + * A cryptographic block cipher mode of operation. This can be used to encrypt + * data of arbitrary length using a block encryption algorithm. + */ + class BlockMode extends string { + BlockMode() { + this = + [ + "ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP", + "XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final + "EAX" // https://en.wikipedia.org/wiki/EAX_mode + ] + } + + /** Holds if this block mode is considered to be insecure. */ + predicate isWeak() { this = "ECB" } + + /** Holds if the given string appears to match this block mode. */ + bindingset[s] + predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") } + } + } + + /** Provides classes for modeling HTTP-related APIs. */ + module Http { + /** Provides classes for modeling HTTP clients. */ + module Client { + /** + * A data flow node that makes an outgoing HTTP request. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `Http::Client::Request::Range` instead. + */ + class Request extends DataFlowNode instanceof Request::Range { + /** + * Gets a data flow node that contributes to the URL of the request. + * Depending on the framework, a request may have multiple nodes which contribute to the URL. + */ + DataFlowNode getAUrlPart() { result = super.getAUrlPart() } + + /** Gets a string that identifies the framework used for this request. */ + string getFramework() { result = super.getFramework() } + + /** + * Holds if this request is made using a mode that disables SSL/TLS + * certificate validation, where `disablingNode` represents the point at + * which the validation was disabled, and `argumentOrigin` represents the origin + * of the argument that disabled the validation (which could be the same node as + * `disablingNode`). + */ + predicate disablesCertificateValidation( + DataFlowNode disablingNode, DataFlowNode argumentOrigin + ) { + super.disablesCertificateValidation(disablingNode, argumentOrigin) + } + } + + /** Provides a class for modeling new HTTP requests. */ + module Request { + /** + * A data flow node that makes an outgoing HTTP request. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `Http::Client::Request` instead. + */ + abstract class Range extends DataFlowNode { + /** + * Gets a data flow node that contributes to the URL of the request. + * Depending on the framework, a request may have multiple nodes which contribute to the URL. + */ + abstract DataFlowNode getAUrlPart(); + + /** Gets a string that identifies the framework used for this request. */ + abstract string getFramework(); + + /** + * Holds if this request is made using a mode that disables SSL/TLS + * certificate validation, where `disablingNode` represents the point at + * which the validation was disabled, and `argumentOrigin` represents the origin + * of the argument that disabled the validation (which could be the same node as + * `disablingNode`). + */ + abstract predicate disablesCertificateValidation( + DataFlowNode disablingNode, DataFlowNode argumentOrigin + ); + } + } + } + } +} diff --git a/shared/concepts/codeql/concepts/CryptoAlgorithms.qll b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll new file mode 100644 index 000000000000..23a45027cf14 --- /dev/null +++ b/shared/concepts/codeql/concepts/CryptoAlgorithms.qll @@ -0,0 +1,119 @@ +/** + * Provides classes modeling cryptographic algorithms, separated into strong and weak variants. + * + * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). + */ +overlay[local?] +module; + +private import codeql.concepts.internal.CryptoAlgorithmNames + +/** + * A cryptographic algorithm. + */ +private newtype TCryptographicAlgorithm = + MkHashingAlgorithm(string name, boolean isWeak) { + isStrongHashingAlgorithm(name) and isWeak = false + or + isWeakHashingAlgorithm(name) and isWeak = true + } or + MkEncryptionAlgorithm(string name, boolean isWeak) { + isStrongEncryptionAlgorithm(name) and isWeak = false + or + isWeakEncryptionAlgorithm(name) and isWeak = true + } or + MkPasswordHashingAlgorithm(string name, boolean isWeak) { + isStrongPasswordHashingAlgorithm(name) and isWeak = false + or + isWeakPasswordHashingAlgorithm(name) and isWeak = true + } + +/** + * Gets the most specific `CryptographicAlgorithm` that matches the given `name`. + * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats. + * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match. + */ +bindingset[name] +private CryptographicAlgorithm getBestAlgorithmForName(string name) { + result = + max(CryptographicAlgorithm algorithm | + algorithm.getName() = + [ + name.toUpperCase(), // the full name + name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces + name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores + ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces + | + algorithm order by algorithm.getName().length() + ) +} + +/** + * A cryptographic algorithm. + */ +abstract class CryptographicAlgorithm extends TCryptographicAlgorithm { + /** Gets a textual representation of this element. */ + string toString() { result = this.getName() } + + /** + * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores). + */ + abstract string getName(); + + /** + * Holds if the name of this algorithm is the most specific match for `name`. + * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc. + */ + bindingset[name] + predicate matchesName(string name) { this = getBestAlgorithmForName(name) } + + /** + * Holds if this algorithm is weak. + */ + abstract predicate isWeak(); +} + +/** + * A hashing algorithm such as `MD5` or `SHA512`. + */ +class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } +} + +/** + * An encryption algorithm such as `DES` or `AES512`. + */ +class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } + + /** Holds if this algorithm is a stream cipher. */ + predicate isStreamCipher() { isStreamCipher(name) } +} + +/** + * A password hashing algorithm such as `PBKDF2` or `SCRYPT`. + */ +class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm { + string name; + boolean isWeak; + + PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) } + + override string getName() { result = name } + + override predicate isWeak() { isWeak = true } +} diff --git a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll similarity index 99% rename from python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll rename to shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll index 8bb63d97876a..efcd870c724a 100644 --- a/python/ql/lib/semmle/python/concepts/internal/CryptoAlgorithmNames.qll +++ b/shared/concepts/codeql/concepts/internal/CryptoAlgorithmNames.qll @@ -7,6 +7,8 @@ * * The classification into strong and weak are based on Wikipedia, OWASP and Google (2021). */ +overlay[local?] +module; /** * Holds if `name` corresponds to a strong hashing algorithm. diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll similarity index 99% rename from ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll rename to shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll index ede88ebf8149..c50d1341c778 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/shared/concepts/codeql/concepts/internal/SensitiveDataHeuristics.qll @@ -6,6 +6,8 @@ * * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. */ +overlay[local?] +module; /** * A classification of different kinds of sensitive data: diff --git a/shared/concepts/qlpack.yml b/shared/concepts/qlpack.yml new file mode 100644 index 000000000000..2b8a40fc79a0 --- /dev/null +++ b/shared/concepts/qlpack.yml @@ -0,0 +1,8 @@ +name: codeql/concepts +version: 0.0.0-dev +groups: shared +library: true +dependencies: + codeql/dataflow: ${workspace} + codeql/util: ${workspace} +warnOnImplicitThis: true diff --git a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll index 044b2a054d7f..d78f9b405327 100644 --- a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll +++ b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll @@ -5,7 +5,7 @@ */ import swift -import internal.SensitiveDataHeuristics +import codeql.concepts.internal.SensitiveDataHeuristics private import codeql.swift.dataflow.DataFlow private import codeql.swift.dataflow.ExternalFlow diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll deleted file mode 100644 index ede88ebf8149..000000000000 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ /dev/null @@ -1,188 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. - * Such that we can share this logic across our CodeQL analysis of different languages. - * - * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. - */ - -/** - * A classification of different kinds of sensitive data: - * - * - secret: generic secret or trusted data; - * - id: a user name or other account information; - * - password: a password or authorization key; - * - certificate: a certificate. - * - private: private data such as credit card numbers - * - * While classifications are represented as strings, this should not be relied upon. - * Instead, use the predicates in `SensitiveDataClassification::` to work with - * classifications. - */ -class SensitiveDataClassification extends string { - SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } -} - -/** - * Provides predicates to select the different kinds of sensitive data we support. - */ -module SensitiveDataClassification { - /** Gets the classification for secret or trusted data. */ - SensitiveDataClassification secret() { result = "secret" } - - /** Gets the classification for user names or other account information. */ - SensitiveDataClassification id() { result = "id" } - - /** Gets the classification for passwords or authorization keys. */ - SensitiveDataClassification password() { result = "password" } - - /** Gets the classification for certificates. */ - SensitiveDataClassification certificate() { result = "certificate" } - - /** Gets the classification for private data. */ - SensitiveDataClassification private() { result = "private" } -} - -/** - * INTERNAL: Do not use. - * - * Provides heuristics for identifying names related to sensitive information. - */ -module HeuristicNames { - /** - * Gets a regular expression that identifies strings that may indicate the presence of secret - * or trusted data. - */ - string maybeSecret() { result = "(?is).*((?