Skip to content

Support for groovy static analysis for groovy scripts #14844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import org.apache.pinot.core.util.ListenerConfigUtil;
import org.apache.pinot.query.mailbox.MailboxService;
import org.apache.pinot.query.service.dispatch.QueryDispatcher;
import org.apache.pinot.segment.local.function.GroovyFunctionEvaluator;
import org.apache.pinot.spi.accounting.ThreadResourceUsageProvider;
import org.apache.pinot.spi.cursors.ResponseStoreService;
import org.apache.pinot.spi.env.PinotConfiguration;
Expand Down Expand Up @@ -478,6 +479,11 @@ public void start()
_participantHelixManager.addPreConnectCallback(
() -> _brokerMetrics.addMeteredGlobalValue(BrokerMeter.HELIX_ZOOKEEPER_RECONNECTS, 1L));

// Initializing Groovy execution security
GroovyFunctionEvaluator.configureGroovySecurity(
_brokerConf.getProperty(CommonConstants.Groovy.GROOVY_QUERY_STATIC_ANALYZER_CONFIG,
_brokerConf.getProperty(CommonConstants.Groovy.GROOVY_ALL_STATIC_ANALYZER_CONFIG)));

// Register the service status handler
registerServiceStatusHandler();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
import org.apache.pinot.core.transport.ServerInstance;
import org.apache.pinot.core.util.GapfillUtils;
import org.apache.pinot.query.parser.utils.ParserUtils;
import org.apache.pinot.segment.local.function.GroovyFunctionEvaluator;
import org.apache.pinot.spi.auth.AuthorizationResult;
import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.QueryConfig;
Expand Down Expand Up @@ -515,9 +516,7 @@ protected BrokerResponse doHandleRequest(long requestId, String query, SqlNodeAn
}

HandlerContext handlerContext = getHandlerContext(offlineTableConfig, realtimeTableConfig);
if (handlerContext._disableGroovy) {
rejectGroovyQuery(serverPinotQuery);
}
validateGroovyScript(serverPinotQuery, handlerContext._disableGroovy);
if (handlerContext._useApproximateFunction) {
handleApproximateFunctionOverride(serverPinotQuery);
}
Expand Down Expand Up @@ -1429,45 +1428,59 @@ private static class HandlerContext {
* Verifies that no groovy is present in the PinotQuery when disabled.
*/
@VisibleForTesting
static void rejectGroovyQuery(PinotQuery pinotQuery) {
static void validateGroovyScript(PinotQuery pinotQuery, boolean disableGroovy) {
List<Expression> selectList = pinotQuery.getSelectList();
for (Expression expression : selectList) {
rejectGroovyQuery(expression);
validateGroovyScript(expression, disableGroovy);
}
List<Expression> orderByList = pinotQuery.getOrderByList();
if (orderByList != null) {
for (Expression expression : orderByList) {
// NOTE: Order-by is always a Function with the ordering of the Expression
rejectGroovyQuery(expression.getFunctionCall().getOperands().get(0));
validateGroovyScript(expression.getFunctionCall().getOperands().get(0), disableGroovy);
}
}
Expression havingExpression = pinotQuery.getHavingExpression();
if (havingExpression != null) {
rejectGroovyQuery(havingExpression);
validateGroovyScript(havingExpression, disableGroovy);
}
Expression filterExpression = pinotQuery.getFilterExpression();
if (filterExpression != null) {
rejectGroovyQuery(filterExpression);
validateGroovyScript(filterExpression, disableGroovy);
}
List<Expression> groupByList = pinotQuery.getGroupByList();
if (groupByList != null) {
for (Expression expression : groupByList) {
rejectGroovyQuery(expression);
validateGroovyScript(expression, disableGroovy);
}
}
}

private static void rejectGroovyQuery(Expression expression) {
private static void validateGroovyScript(Expression expression, boolean disableGroovy) {
Function function = expression.getFunctionCall();
if (function == null) {
return;
}
if (function.getOperator().equals("groovy")) {
throw new BadQueryRequestException("Groovy transform functions are disabled for queries");
if (disableGroovy) {
throw new BadQueryRequestException("Groovy transform functions are disabled for queries");
} else {
groovySecureAnalysis(function);
}
}
for (Expression operandExpression : function.getOperands()) {
rejectGroovyQuery(operandExpression);
validateGroovyScript(operandExpression, disableGroovy);
}
}

private static void groovySecureAnalysis(Function function) {
List<Expression> operands = function.getOperands();
if (operands == null || operands.size() < 2) {
throw new BadQueryRequestException("Groovy transform function must have at least 2 argument");
}
// second argument in the groovy function is groovy script
String script = operands.get(1).getLiteral().getStringValue();
GroovyFunctionEvaluator.parseGroovyScript(String.format("groovy({%s})", script));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,19 @@

package org.apache.pinot.broker.requesthandler;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.ImmutableMap;
import java.util.Map;
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.segment.local.function.GroovyFunctionEvaluator;
import org.apache.pinot.segment.local.function.GroovyStaticAnalyzerConfig;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.testng.Assert;
import org.testng.annotations.Test;

import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;


public class QueryValidationTest {

Expand Down Expand Up @@ -93,24 +99,64 @@ public void testNonExistingColumns() {
}

@Test
public void testRejectGroovyQuery() {
testRejectGroovyQuery(
public void testValidateGroovyQuery() {
testValidateGroovyQuery(
"SELECT groovy('{\"returnType\":\"INT\",\"isSingleValue\":true}', 'arg0 + arg1', colA, colB) FROM foo", true);
testRejectGroovyQuery(
testValidateGroovyQuery(
"SELECT GROOVY('{\"returnType\":\"INT\",\"isSingleValue\":true}', 'arg0 + arg1', colA, colB) FROM foo", true);
testRejectGroovyQuery(
testValidateGroovyQuery(
"SELECT groo_vy('{\"returnType\":\"INT\",\"isSingleValue\":true}', 'arg0 + arg1', colA, colB) FROM foo", true);
testRejectGroovyQuery(
testValidateGroovyQuery(
"SELECT foo FROM bar WHERE GROOVY('{\"returnType\":\"STRING\",\"isSingleValue\":true}', 'arg0 + arg1', colA,"
+ " colB) = 'foobarval'", true);
testRejectGroovyQuery(
testValidateGroovyQuery(
"SELECT COUNT(colA) FROM bar GROUP BY GROOVY('{\"returnType\":\"STRING\",\"isSingleValue\":true}', "
+ "'arg0 + arg1', colA, colB)", true);
testRejectGroovyQuery(
testValidateGroovyQuery(
"SELECT foo FROM bar HAVING GROOVY('{\"returnType\":\"STRING\",\"isSingleValue\":true}', 'arg0 + arg1', colA,"
+ " colB) = 'foobarval'", true);

testRejectGroovyQuery("SELECT foo FROM bar", false);
testValidateGroovyQuery("SELECT foo FROM bar", false);
}

@Test
public void testGroovyScripts()
throws JsonProcessingException {
// setup secure groovy config
GroovyFunctionEvaluator.setGroovyStaticAnalyzerConfig(GroovyStaticAnalyzerConfig.createDefault());

String inValidGroovyQuery = "SELECT groovy('{\"returnType\":\"INT\",\"isSingleValue\":true}') FROM foo";
runUnsupportedGroovy(inValidGroovyQuery, "Groovy transform function must have at least 2 argument");

String groovyInvalidMethodInvokeQuery =
"SELECT groovy('{\"returnType\":\"STRING\",\"isSingleValue\":true}', 'return [\"bash\", \"-c\", \"echo Hello,"
+ " World!\"].execute().text') FROM foo";
runUnsupportedGroovy(groovyInvalidMethodInvokeQuery, "Expression [MethodCallExpression] is not allowed");

String groovyInvalidImportsQuery =
"SELECT groovy( '{\"returnType\":\"INT\",\"isSingleValue\":true}', 'def args = [\"QuickStart\", \"-type\", "
+ "\"REALTIME\"] as String[]; org.apache.pinot.tools.admin.PinotAdministrator.main(args); 2') FROM foo";
runUnsupportedGroovy(groovyInvalidImportsQuery, "Indirect import checks prevents usage of expression");

String groovyInOrderByClause =
"SELECT colA, colB FROM foo ORDER BY groovy('{\"returnType\":\"STRING\",\"isSingleValue\":true}', 'return "
+ "[\"bash\", \"-c\", \"echo Hello, World!\"].execute().text') DESC";
runUnsupportedGroovy(groovyInOrderByClause, "Expression [MethodCallExpression] is not allowed");

String groovyInHavingClause =
"SELECT colA, SUM(colB) AS totalB, groovy('{\"returnType\":\"DOUBLE\",\"isSingleValue\":true}', 'arg0 / "
+ "arg1', SUM(colB), COUNT(*)) AS avgB FROM foo GROUP BY colA HAVING groovy('{\"returnType\":\"BOOLEAN\","
+ "\"isSingleValue\":true}', 'System.metaClass.methods.each { method -> if (method.name.md5() == "
+ "\"f24f62eeb789199b9b2e467df3b1876b\") {method.invoke(System, 10)} }', SUM(colB))";
runUnsupportedGroovy(groovyInHavingClause, "Indirect import checks prevents usage of expression");

String groovyInWhereClause =
"SELECT colA, colB FROM foo WHERE groovy('{\"returnType\":\"BOOLEAN\",\"isSingleValue\":true}', 'System.exit"
+ "(10)', colA)";
runUnsupportedGroovy(groovyInWhereClause, "Indirect import checks prevents usage of expression");

// Reset groovy config for rest of the testing
GroovyFunctionEvaluator.setGroovyStaticAnalyzerConfig(null);
}

@Test
Expand All @@ -121,24 +167,34 @@ public void testReplicaGroupToQueryInvalidQuery() {
() -> BaseSingleStageBrokerRequestHandler.validateRequest(pinotQuery, 10));
}

private void testRejectGroovyQuery(String query, boolean queryContainsGroovy) {
private void testValidateGroovyQuery(String query, boolean queryContainsGroovy) {
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);

try {
BaseSingleStageBrokerRequestHandler.rejectGroovyQuery(pinotQuery);
BaseSingleStageBrokerRequestHandler.validateGroovyScript(pinotQuery, queryContainsGroovy);
if (queryContainsGroovy) {
Assert.fail("Query should have failed since groovy was found in query: " + pinotQuery);
fail("Query should have failed since groovy was found in query: " + pinotQuery);
}
} catch (Exception e) {
Assert.assertEquals(e.getMessage(), "Groovy transform functions are disabled for queries");
}
}

private static void runUnsupportedGroovy(String query, String errorMsg) {
try {
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
BaseSingleStageBrokerRequestHandler.validateGroovyScript(pinotQuery, false);
fail("Query should have failed since malicious groovy was found in query");
} catch (Exception e) {
assertTrue(e.getMessage().contains(errorMsg));
}
}

private void testUnsupportedQuery(String query, String errorMessage) {
try {
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
BaseSingleStageBrokerRequestHandler.validateRequest(pinotQuery, 1000);
Assert.fail("Query should have failed");
fail("Query should have failed");
} catch (Exception e) {
Assert.assertEquals(e.getMessage(), errorMessage);
}
Expand All @@ -149,7 +205,7 @@ private void testNonExistingColumns(String rawTableName, boolean isCaseInsensiti
try {
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
BaseSingleStageBrokerRequestHandler.updateColumnNames(rawTableName, pinotQuery, isCaseInsensitive, columnNameMap);
Assert.fail("Query should have failed");
fail("Query should have failed");
} catch (Exception e) {
Assert.assertEquals(errorMessage, e.getMessage());
}
Expand All @@ -161,7 +217,7 @@ private void testExistingColumns(String rawTableName, boolean isCaseInsensitive,
PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
BaseSingleStageBrokerRequestHandler.updateColumnNames(rawTableName, pinotQuery, isCaseInsensitive, columnNameMap);
} catch (Exception e) {
Assert.fail("Query should have succeeded");
fail("Query should have succeeded");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ public enum ControllerMeter implements AbstractMetrics.Meter {
IDEAL_STATE_UPDATE_RETRY("IdealStateUpdateRetry", false),
IDEAL_STATE_UPDATE_SUCCESS("IdealStateUpdateSuccess", false);


private final String _brokerMeterName;
private final String _unit;
private final boolean _global;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
import org.apache.pinot.core.segment.processing.lifecycle.PinotSegmentLifecycleEventListenerManager;
import org.apache.pinot.core.transport.ListenerConfig;
import org.apache.pinot.core.util.ListenerConfigUtil;
import org.apache.pinot.segment.local.function.GroovyFunctionEvaluator;
import org.apache.pinot.segment.local.utils.TableConfigUtils;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.crypt.PinotCrypterFactory;
Expand Down Expand Up @@ -387,7 +388,8 @@ public ControllerConf getConfig() {
}

@Override
public void start() {
public void start()
throws Exception {
LOGGER.info("Starting Pinot controller in mode: {}. (Version: {})", _controllerMode.name(), PinotVersion.VERSION);
LOGGER.info("Controller configs: {}", new PinotAppConfigs(getConfig()).toJSONString());
long startTimeMs = System.currentTimeMillis();
Expand All @@ -412,6 +414,11 @@ public void start() {
break;
}

// Initializing Groovy execution security
GroovyFunctionEvaluator.configureGroovySecurity(
_config.getProperty(CommonConstants.Groovy.GROOVY_INGESTION_STATIC_ANALYZER_CONFIG,
_config.getProperty(CommonConstants.Groovy.GROOVY_ALL_STATIC_ANALYZER_CONFIG)));

ServiceStatus.setServiceStatusCallback(_helixParticipantInstanceId,
new ServiceStatus.MultipleCallbackServiceStatusCallback(_serviceStatusCallbackList));
_controllerMetrics.addTimedValue(ControllerTimer.STARTUP_SUCCESS_DURATION_MS,
Expand Down
Loading
Loading