diff --git a/python/ql/lib/semmle/python/frameworks/Pandas.qll b/python/ql/lib/semmle/python/frameworks/Pandas.qll index eb6c3c44409c..d4c94f3e8386 100644 --- a/python/ql/lib/semmle/python/frameworks/Pandas.qll +++ b/python/ql/lib/semmle/python/frameworks/Pandas.qll @@ -151,4 +151,17 @@ private module Pandas { override DataFlow::Node getCode() { result = this.getParameter(0, "expr").asSink() } } + + /** + * A Call to `pandas.read_sql` or `pandas.read_sql_query` + * which allows for executing raw SQL queries against a database. + * See https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html + */ + class ReadSqlCall extends SqlExecution::Range, DataFlow::CallCfgNode { + ReadSqlCall() { + this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() + } + + override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] } + } } diff --git a/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md b/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md new file mode 100644 index 000000000000..a230dcc63ec3 --- /dev/null +++ b/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added SQL injection models from the `pandas` PyPI package. diff --git a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py index a524fa214459..e9a368f8c1ba 100644 --- a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py +++ b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py @@ -1,5 +1,5 @@ import pandas as pd - +import sqlite3 df = pd.DataFrame({'temp_c': [17.0, 25.0]}, index=['Portland', 'Berkeley']) df.sample().query("query") # $getCode="query" @@ -55,11 +55,12 @@ df.query("query") # $getCode="query" df.eval("query") # $getCode="query" -df = pd.read_sql_query("filepath", 'postgres:///db_name') +connection = sqlite3.connect("pets.db") +df = pd.read_sql_query("sql query", connection) # $getSql="sql query" df.query("query") # $getCode="query" df.eval("query") # $getCode="query" -df = pd.read_sql("filepath", 'postgres:///db_name') +df = pd.read_sql("sql query", connection) # $getSql="sql query" df.query("query") # $getCode="query" df.eval("query") # $getCode="query"