Skip to content

Commit c9ece5c

Browse files
committed
SQL: Stronger read-only mode, using sqlparse
1 parent 5727a27 commit c9ece5c

File tree

9 files changed

+174
-15
lines changed

9 files changed

+174
-15
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
- MCP documentation: Add reference to medium-sized llms.txt context file
1212
- Boilerplate: Added software tests and CI configuration
1313
- Documentation: Added development sandbox section
14+
- SQL: Stronger read-only mode, using `sqlparse`

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,12 @@ LLMs can still hallucinate and give incorrect answers.
4040
* What is the storage consumption of my tables, give it in a graph.
4141
* How can I format a timestamp column to '2019 Jan 21'.
4242

43-
# Data integrity
44-
We do not recommend letting the LLMs insert data or modify columns by itself, as such we tell the
45-
LLMs to only allow 'SELECT' statements in the `cratedb_sql` tool, you can jailbreak this against
46-
our recommendation.
43+
# Read-only access
44+
We do not recommend letting LLM-based agents insert or modify data by itself.
45+
As such, only `SELECT` statements are permitted and forwarded to the database.
46+
All other operations will raise a `ValueError` exception, unless the
47+
`CRATEDB_MCP_PERMIT_ALL_STATEMENTS` environment variable is set to a
48+
truthy value.
4749

4850
# Install
4951
```shell

cratedb_mcp/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import httpx
22
from mcp.server.fastmcp import FastMCP
33

4-
from .knowledge import DOCUMENTATION_INDEX, Queries
4+
from .knowledge import DOCUMENTATION_INDEX, Queries, sql_expression_permitted
55
from .settings import HTTP_URL
66

77
mcp = FastMCP("cratedb-mcp")
@@ -14,7 +14,7 @@ def query_cratedb(query: str) -> list[dict]:
1414
@mcp.tool(description="Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that"
1515
" modify data, columns or are otherwise deemed un-safe are rejected.")
1616
def query_sql(query: str):
17-
if 'select' not in query.lower():
17+
if not sql_expression_permitted(query):
1818
raise ValueError('Only queries that have a SELECT statement are allowed.')
1919
return query_cratedb(query)
2020

cratedb_mcp/knowledge.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
# ruff: noqa: E501
2+
import logging
3+
4+
import sqlparse
5+
6+
from cratedb_mcp.settings import PERMIT_ALL_STATEMENTS
7+
8+
logger = logging.getLogger(__name__)
9+
210

311
class Queries:
412
TABLES_METADATA = """
@@ -100,3 +108,48 @@ class Queries:
100108
"link": "https://raw.githubusercontent.com/crate/cratedb-guide/9ab661997d7704ecbb63af9c3ee33535957e24e6/docs/performance/optimization.rst"
101109
}
102110
]
111+
112+
113+
def sql_expression_permitted(expression: str) -> bool:
114+
"""
115+
Validate the SQL expression, only permit read queries by default.
116+
117+
When the `CRATEDB_MCP_PERMIT_ALL_STATEMENTS` environment variable is set,
118+
allow all types of statements.
119+
120+
FIXME: Revisit implementation, it might be too naive or weak.
121+
Issue: https://github.com/crate/cratedb-mcp/issues/10
122+
Question: Does SQLAlchemy provide a solid read-only mode, or any other library?
123+
"""
124+
outcome = _sql_expression_permitted(expression)
125+
if outcome is True:
126+
logger.info(f"Permitted SQL expression: {expression}")
127+
else:
128+
logger.warning(f"Denied SQL expression: {expression}")
129+
return outcome
130+
131+
132+
def _sql_expression_permitted(expression: str) -> bool:
133+
134+
if not expression:
135+
return False
136+
137+
if PERMIT_ALL_STATEMENTS:
138+
return True
139+
140+
# Parse the SQL statement.
141+
parsed = sqlparse.parse(expression.strip())
142+
if not parsed:
143+
return False
144+
145+
# Check for multiple statements (potential SQL injection).
146+
if len(parsed) > 1:
147+
return False
148+
149+
# Check if the expression is valid and if it's a SELECT statement,
150+
# also trying to consider `SELECT ... INTO ...` statements.
151+
operation = parsed[0].get_type().upper()
152+
tokens = [str(item).upper() for item in parsed[0]]
153+
if operation != 'SELECT' or (operation == 'SELECT' and 'INTO' in tokens):
154+
return False
155+
return True

cratedb_mcp/settings.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
11
import os
2+
import warnings
3+
4+
from attr.converters import to_bool
25

36
HTTP_URL: str = os.getenv("CRATEDB_MCP_HTTP_URL", "http://localhost:4200")
7+
8+
# Whether to permit all statements. By default, only SELECT operations are permitted.
9+
PERMIT_ALL_STATEMENTS: bool = to_bool(os.getenv("CRATEDB_MCP_PERMIT_ALL_STATEMENTS", "false"))
10+
11+
# TODO: Refactor into code which is not on the module level. Use OOM early.
12+
if PERMIT_ALL_STATEMENTS: # pragma: no cover
13+
warnings.warn("All types of SQL statements are permitted. This means the LLM "
14+
"agent can write and modify the connected database", category=UserWarning, stacklevel=2)

docs/backlog.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# CrateDB MCP backlog
2+
3+
## Iteration +1
4+
- Docs: HTTP caching
5+
- Docs: Load documentation index from custom outline file
6+
7+
## Done
8+
- SQL: Stronger read-only mode

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ dynamic = [
2020
"version",
2121
]
2222
dependencies = [
23+
"attrs",
2324
"mcp[cli]>=1.5.0",
25+
"sqlparse<0.6",
2426
]
2527
optional-dependencies.develop = [
2628
"mypy<1.16",

tests/test_knowledge.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries
1+
import cratedb_mcp
2+
from cratedb_mcp.knowledge import DOCUMENTATION_INDEX, Queries, sql_expression_permitted
23

34

45
def test_documentation_index():
@@ -16,3 +17,83 @@ def test_queries():
1617
assert "sys.health" in Queries.TABLES_METADATA
1718
assert "WITH partitions_health" in Queries.TABLES_METADATA
1819
assert "LEFT JOIN" in Queries.TABLES_METADATA
20+
21+
22+
def test_sql_expression_select_permitted():
23+
"""Regular SQL SELECT statements are permitted"""
24+
assert sql_expression_permitted("SELECT 42") is True
25+
26+
27+
def test_sql_expression_insert_allowed(mocker):
28+
"""When explicitly allowed, permit any kind of statement"""
29+
mocker.patch.object(cratedb_mcp.knowledge, "PERMIT_ALL_STATEMENTS", True)
30+
assert sql_expression_permitted("INSERT INTO foobar") is True
31+
32+
33+
def test_sql_expression_select_multiple_rejected():
34+
"""Multiple SQL statements are rejected"""
35+
assert sql_expression_permitted("SELECT 42; SELECT 42;") is False
36+
37+
38+
def test_sql_expression_create_rejected():
39+
"""DDL statements are rejected"""
40+
assert sql_expression_permitted("CREATE TABLE foobar AS SELECT 42") is False
41+
42+
43+
def test_sql_expression_insert_rejected():
44+
"""DML statements are rejected"""
45+
assert sql_expression_permitted("INSERT INTO foobar") is False
46+
47+
48+
def test_sql_expression_select_into_rejected():
49+
"""SELECT+DML statements are rejected"""
50+
assert sql_expression_permitted("SELECT * INTO foobar FROM bazqux") is False
51+
52+
53+
def test_sql_expression_empty_rejected():
54+
"""Empty statements are rejected"""
55+
assert sql_expression_permitted("") is False
56+
57+
58+
def test_sql_expression_almost_empty_rejected():
59+
"""Quasi-empty statements are rejected"""
60+
assert sql_expression_permitted(" ") is False
61+
62+
63+
def test_sql_expression_none_rejected():
64+
"""Void statements are rejected"""
65+
assert sql_expression_permitted(None) is False
66+
67+
68+
def test_sql_expression_multiple_statements_rejected():
69+
assert sql_expression_permitted("SELECT 42; INSERT INTO foo VALUES (1)") is False
70+
71+
72+
def test_sql_expression_with_comments_rejected():
73+
assert sql_expression_permitted(
74+
"/* Sneaky comment */ INSERT /* another comment */ INTO foo VALUES (1)") is False
75+
76+
77+
def test_sql_expression_update_rejected():
78+
"""UPDATE statements are rejected"""
79+
assert sql_expression_permitted("UPDATE foobar SET column = 'value'") is False
80+
81+
82+
def test_sql_expression_delete_rejected():
83+
"""DELETE statements are rejected"""
84+
assert sql_expression_permitted("DELETE FROM foobar") is False
85+
86+
87+
def test_sql_expression_truncate_rejected():
88+
"""TRUNCATE statements are rejected"""
89+
assert sql_expression_permitted("TRUNCATE TABLE foobar") is False
90+
91+
92+
def test_sql_expression_drop_rejected():
93+
"""DROP statements are rejected"""
94+
assert sql_expression_permitted("DROP TABLE foobar") is False
95+
96+
97+
def test_sql_expression_alter_rejected():
98+
"""ALTER statements are rejected"""
99+
assert sql_expression_permitted("ALTER TABLE foobar ADD COLUMN newcol INTEGER") is False

tests/test_mcp.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,20 @@ def test_fetch_docs_permitted():
2424
assert "initcap" in response
2525

2626

27-
def test_query_sql_forbidden():
27+
def test_query_sql_permitted():
28+
assert query_sql("SELECT 42")["rows"] == [[42]]
29+
30+
31+
def test_query_sql_forbidden_easy():
2832
with pytest.raises(ValueError) as ex:
2933
assert "RelationUnknown" in str(query_sql("INSERT INTO foobar (id) VALUES (42) RETURNING id"))
3034
assert ex.match("Only queries that have a SELECT statement are allowed")
3135

3236

33-
def test_query_sql_permitted():
34-
assert query_sql("SELECT 42")["rows"] == [[42]]
35-
36-
37-
def test_query_sql_permitted_exploit():
38-
# FIXME: Read-only protection must become stronger.
39-
query_sql("INSERT INTO foobar (operation) VALUES ('select')")
37+
def test_query_sql_forbidden_sneak_value():
38+
with pytest.raises(ValueError) as ex:
39+
query_sql("INSERT INTO foobar (operation) VALUES ('select')")
40+
assert ex.match("Only queries that have a SELECT statement are allowed")
4041

4142

4243
def test_get_table_metadata():

0 commit comments

Comments
 (0)