Skip to content

Commit f99eca6

Browse files
committed
SQL: Attempt to use cratedb-sqlparse for implementing read-only mode
1 parent b94242f commit f99eca6

File tree

8 files changed

+246
-152
lines changed

8 files changed

+246
-152
lines changed

cratedb_mcp/__main__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import httpx
33
from mcp.server.fastmcp import FastMCP
44

5-
from .knowledge import DOCUMENTATION_INDEX, Queries, sql_expression_permitted
5+
from .knowledge import DOCUMENTATION_INDEX, Queries
66
from .settings import DOCS_CACHE_TTL, HTTP_URL
7+
from .util.sql import sql_is_permitted
78

89
# Configure Hishel, an httpx client with caching.
910
# Define one hour of caching time.
@@ -22,7 +23,7 @@ def query_cratedb(query: str) -> list[dict]:
2223
@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that"
2324
" modify data, columns or are otherwise deemed un-safe are rejected.")
2425
def query_sql(query: str):
25-
if not sql_expression_permitted(query):
26+
if not sql_is_permitted(query):
2627
raise ValueError('Only queries that have a SELECT statement are allowed.')
2728
return query_cratedb(query)
2829

cratedb_mcp/knowledge.py

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
11
# ruff: noqa: E501
2-
import logging
3-
4-
import sqlparse
5-
6-
from cratedb_mcp.settings import PERMIT_ALL_STATEMENTS
7-
8-
logger = logging.getLogger(__name__)
9-
102

113
class Queries:
124
TABLES_METADATA = """
@@ -108,48 +100,3 @@ class Queries:
108100
"link": "https://raw.githubusercontent.com/crate/cratedb-guide/9ab661997d7704ecbb63af9c3ee33535957e24e6/docs/performance/optimization.rst"
109101
}
110102
]
111-
112-
113-
def sql_expression_permitted(expression: str) -> bool:
114-
"""
115-
Validate the SQL expression, only permit read queries by default.
116-
117-
When the `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` environment variable is set,
118-
allow all types of statements.
119-
120-
FIXME: Revisit implementation, it might be too naive or weak.
121-
Issue: https://github.com/crate/cratedb-mcp/issues/10
122-
Question: Does SQLAlchemy provide a solid read-only mode, or any other library?
123-
"""
124-
outcome = _sql_expression_permitted(expression)
125-
if outcome is True:
126-
logger.info(f"Permitted SQL expression: {expression}")
127-
else:
128-
logger.warning(f"Denied SQL expression: {expression}")
129-
return outcome
130-
131-
132-
def _sql_expression_permitted(expression: str) -> bool:
133-
134-
if not expression:
135-
return False
136-
137-
if PERMIT_ALL_STATEMENTS:
138-
return True
139-
140-
# Parse the SQL statement.
141-
parsed = sqlparse.parse(expression.strip())
142-
if not parsed:
143-
return False
144-
145-
# Check for multiple statements (potential SQL injection).
146-
if len(parsed) > 1:
147-
return False
148-
149-
# Check if the expression is valid and if it's a SELECT statement,
150-
# also trying to consider `SELECT ... INTO ...` statements.
151-
operation = parsed[0].get_type().upper()
152-
tokens = [str(item).upper() for item in parsed[0]]
153-
if operation != 'SELECT' or (operation == 'SELECT' and 'INTO' in tokens):
154-
return False
155-
return True

cratedb_mcp/util/__init__.py

Whitespace-only changes.

cratedb_mcp/util/sql.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import dataclasses
2+
import logging
3+
import typing as t
4+
5+
import cratedb_sqlparse
6+
import sqlparse
7+
8+
from cratedb_mcp.settings import PERMIT_ALL_STATEMENTS
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
def sql_is_permitted(expression: str) -> bool:
14+
"""
15+
Validate the SQL expression, only permit read queries by default.
16+
17+
When the `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` environment variable is set,
18+
allow all types of statements.
19+
20+
FIXME: Revisit implementation, it might be too naive or weak.
21+
Issue: https://github.com/crate/cratedb-mcp/issues/10
22+
Question: Does SQLAlchemy provide a solid read-only mode, or any other library?
23+
"""
24+
is_dql = SqlFilter(expression=expression, permit_all=PERMIT_ALL_STATEMENTS).is_dql
25+
if is_dql is True:
26+
logger.info(f"Permitted SQL expression: {expression}")
27+
else:
28+
logger.warning(f"Denied SQL expression: {expression}")
29+
return is_dql
30+
31+
32+
@dataclasses.dataclass
33+
class SqlFilter:
34+
"""
35+
Manage an SQL expression for filtering purposes.
36+
Here, most importantly: Permit read-only SQL SELECT DQL statements only.
37+
"""
38+
expression: str
39+
permit_all: bool = False
40+
41+
_parsed_cratedb: t.Any = dataclasses.field(init=False, default=None)
42+
_parsed_sqlparse: t.Any = dataclasses.field(init=False, default=None)
43+
44+
def __post_init__(self) -> None:
45+
if self.expression:
46+
self.expression = self.expression.strip()
47+
48+
def parse_cratedb(self):
49+
"""
50+
Parse expression using `cratedb-sqlparse` library.
51+
"""
52+
if self._parsed_cratedb is None:
53+
self._parsed_cratedb = cratedb_sqlparse.sqlparse(self.expression)
54+
return self._parsed_cratedb
55+
56+
def parse_sqlparse(self):
57+
"""
58+
Parse expression using traditional `sqlparse` library.
59+
"""
60+
if self._parsed_sqlparse is None:
61+
self._parsed_sqlparse = sqlparse.parse(self.expression)
62+
return self._parsed_sqlparse
63+
64+
@property
65+
def is_dql(self) -> bool:
66+
"""
67+
Whether the statement is a DQL statement, which effectively invokes read-only operations only.
68+
"""
69+
70+
if not self.expression:
71+
return False
72+
73+
if self.permit_all:
74+
return True
75+
76+
# Parse the SQL statement using `cratedb-sqlparse`.
77+
parsed = self.parse_cratedb()
78+
79+
# Reject multiple statements to prevent potential SQL injections.
80+
if len(parsed) > 1:
81+
return False
82+
83+
# Check if the expression is valid and if it's a DQL/SELECT statement,
84+
# also trying to consider `SELECT ... INTO ...` and evasive
85+
# `SELECT * FROM users; \uff1b DROP TABLE users` statements.
86+
return self.is_select and not self.is_camouflage
87+
88+
@property
89+
def is_select(self) -> bool:
90+
"""
91+
Whether the expression is an SQL SELECT statement.
92+
"""
93+
return self.operation == 'SELECT'
94+
95+
@property
96+
def operation(self) -> str:
97+
"""
98+
The SQL operation: SELECT, INSERT, UPDATE, DELETE, CREATE, etc.
99+
"""
100+
return self._parsed_cratedb[0].type.upper()
101+
102+
@property
103+
def is_camouflage(self) -> bool:
104+
"""
105+
Innocent-looking `SELECT` statements can evade filters.
106+
"""
107+
return self.is_select_into or self.is_evasive
108+
109+
@property
110+
def is_select_into(self) -> bool:
111+
"""
112+
Use traditional `sqlparse` for catching `SELECT ... INTO ...` statements.
113+
114+
Note: With `cratedb-sqlparse`, we can't find the `INTO` token at all?
115+
116+
Examples:
117+
118+
SELECT * INTO foobar FROM bazqux
119+
SELECT * FROM bazqux INTO foobar
120+
"""
121+
parsed = self.parse_sqlparse()
122+
tokens = [str(item).upper() for item in parsed[0]]
123+
return "INTO" in tokens
124+
125+
@property
126+
def is_evasive(self) -> bool:
127+
"""
128+
Use traditional `sqlparse` for catching evasive SQL statements.
129+
130+
A practice picked up from CodeRabbit was to reject multiple statements
131+
to prevent potential SQL injections. Is it a viable suggestion?
132+
133+
Note: This currently does not work with `cratedb-sqlparse`?
134+
135+
Examples:
136+
137+
SELECT * FROM users; \uff1b DROP TABLE users
138+
"""
139+
parsed = self.parse_sqlparse()
140+
return len(parsed) > 1

docs/backlog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## Iteration +1
44
- Docs: HTTP caching
55
- Docs: Load documentation index from custom outline file
6+
- SQL: Extract `SqlFilter` or `SqlGateway` functionality to `cratedb-sqlparse`
67

78
## Done
89
- SQL: Stronger read-only mode

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dynamic = [
2121
]
2222
dependencies = [
2323
"attrs",
24+
"cratedb-sqlparse==0.0.14",
2425
"hishel<0.2",
2526
"mcp[cli]>=1.5.0",
2627
"sqlparse<0.6",

tests/test_knowledge.py

Lines changed: 1 addition & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import cratedb_mcp
2-
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries, sql_expression_permitted
1+
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries
32

43

54
def test_documentation_index():
@@ -19,98 +18,3 @@ def test_queries():
1918
assert "LEFT JOIN" in Queries.TABLES_METADATA
2019

2120

22-
def test_sql_expression_select_permitted():
23-
"""Regular SQL SELECT statements are permitted"""
24-
assert sql_expression_permitted("SELECT 42") is True
25-
assert sql_expression_permitted(" SELECT 42") is True
26-
assert sql_expression_permitted("select 42") is True
27-
28-
29-
def test_sql_expression_select_rejected():
30-
"""Bogus SQL SELECT statements are rejected"""
31-
assert sql_expression_permitted(r"--\; select 42") is False
32-
33-
34-
def test_sql_expression_insert_allowed(mocker):
35-
"""When explicitly allowed, permit any kind of statement"""
36-
mocker.patch.object(cratedb_mcp.knowledge, "PERMIT_ALL_STATEMENTS", True)
37-
assert sql_expression_permitted("INSERT INTO foobar") is True
38-
39-
40-
def test_sql_expression_select_multiple_rejected():
41-
"""Multiple SQL statements are rejected"""
42-
assert sql_expression_permitted("SELECT 42; SELECT 42;") is False
43-
44-
45-
def test_sql_expression_create_rejected():
46-
"""DDL statements are rejected"""
47-
assert sql_expression_permitted("CREATE TABLE foobar AS SELECT 42") is False
48-
49-
50-
def test_sql_expression_insert_rejected():
51-
"""DML statements are rejected"""
52-
assert sql_expression_permitted("INSERT INTO foobar") is False
53-
54-
55-
def test_sql_expression_select_into_rejected():
56-
"""SELECT+DML statements are rejected"""
57-
assert sql_expression_permitted("SELECT * INTO foobar FROM bazqux") is False
58-
59-
60-
def test_sql_expression_empty_rejected():
61-
"""Empty statements are rejected"""
62-
assert sql_expression_permitted("") is False
63-
64-
65-
def test_sql_expression_almost_empty_rejected():
66-
"""Quasi-empty statements are rejected"""
67-
assert sql_expression_permitted(" ") is False
68-
69-
70-
def test_sql_expression_none_rejected():
71-
"""Void statements are rejected"""
72-
assert sql_expression_permitted(None) is False
73-
74-
75-
def test_sql_expression_multiple_statements_rejected():
76-
assert sql_expression_permitted("SELECT 42; INSERT INTO foo VALUES (1)") is False
77-
78-
79-
def test_sql_expression_with_comments_rejected():
80-
assert sql_expression_permitted(
81-
"/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)") is False
82-
83-
84-
def test_sql_expression_update_rejected():
85-
"""UPDATE statements are rejected"""
86-
assert sql_expression_permitted("UPDATE foobar SET column = 'value'") is False
87-
88-
89-
def test_sql_expression_delete_rejected():
90-
"""DELETE statements are rejected"""
91-
assert sql_expression_permitted("DELETE FROM foobar") is False
92-
93-
94-
def test_sql_expression_truncate_rejected():
95-
"""TRUNCATE statements are rejected"""
96-
assert sql_expression_permitted("TRUNCATE TABLE foobar") is False
97-
98-
99-
def test_sql_expression_drop_rejected():
100-
"""DROP statements are rejected"""
101-
assert sql_expression_permitted("DROP TABLE foobar") is False
102-
103-
104-
def test_sql_expression_alter_rejected():
105-
"""ALTER statements are rejected"""
106-
assert sql_expression_permitted("ALTER TABLE foobar ADD COLUMN newcol INTEGER") is False
107-
108-
109-
def test_sql_expression_case_manipulation_rejected():
110-
"""Statements with case manipulation to hide intent are rejected"""
111-
assert sql_expression_permitted("SeLeCt * FrOm users; DrOp TaBlE users") is False
112-
113-
114-
def test_sql_expression_unicode_evasion_rejected():
115-
"""Statements with unicode characters to evade filters are rejected"""
116-
assert sql_expression_permitted("SELECT * FROM users; \uFF1B DROP TABLE users") is False

0 commit comments

Comments
 (0)