-
Notifications
You must be signed in to change notification settings - Fork 144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improved patterns command with new algorithm #3263
base: main
Are you sure you want to change the base?
Changes from 1 commit
9d93991
6874a5f
dd9373f
c62b25b
9c6327a
bda53df
786dcee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.sql.ast.expression; | ||
|
||
import lombok.Getter; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
@RequiredArgsConstructor | ||
public enum PatternMethod { | ||
SIMPLE("simple"), | ||
BRAIN("brain"); | ||
|
||
@Getter final String name; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.sql.ast.tree; | ||
|
||
import com.google.common.collect.ImmutableList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import lombok.AllArgsConstructor; | ||
import lombok.EqualsAndHashCode; | ||
import lombok.Getter; | ||
import lombok.RequiredArgsConstructor; | ||
import lombok.Setter; | ||
import lombok.ToString; | ||
import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
import org.opensearch.sql.ast.expression.Literal; | ||
import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
||
@Getter | ||
@Setter | ||
@ToString | ||
@EqualsAndHashCode(callSuper = false) | ||
@RequiredArgsConstructor | ||
@AllArgsConstructor | ||
public class Pattern extends UnresolvedPlan { | ||
|
||
private final UnresolvedExpression patternWindowFunction; | ||
|
||
private final Map<String, Literal> arguments; | ||
|
||
private UnresolvedPlan child; | ||
|
||
@Override | ||
public Pattern attach(UnresolvedPlan child) { | ||
this.child = child; | ||
return this; | ||
} | ||
|
||
@Override | ||
public List<UnresolvedPlan> getChild() { | ||
return ImmutableList.of(this.child); | ||
} | ||
|
||
@Override | ||
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) { | ||
return nodeVisitor.visitPattern(this, context); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,8 +6,10 @@ | |
package org.opensearch.sql.expression.window; | ||
|
||
import static java.util.Collections.emptyList; | ||
import static org.opensearch.sql.data.type.ExprCoreType.STRING; | ||
|
||
import com.google.common.collect.ImmutableMap; | ||
import java.util.Collections; | ||
import java.util.function.Supplier; | ||
import lombok.experimental.UtilityClass; | ||
import org.opensearch.sql.expression.function.BuiltinFunctionName; | ||
|
@@ -16,6 +18,8 @@ | |
import org.opensearch.sql.expression.function.FunctionBuilder; | ||
import org.opensearch.sql.expression.function.FunctionName; | ||
import org.opensearch.sql.expression.function.FunctionSignature; | ||
import org.opensearch.sql.expression.window.patterns.BufferPatternWindowFunction; | ||
import org.opensearch.sql.expression.window.patterns.StreamPatternWindowFunction; | ||
import org.opensearch.sql.expression.window.ranking.DenseRankFunction; | ||
import org.opensearch.sql.expression.window.ranking.RankFunction; | ||
import org.opensearch.sql.expression.window.ranking.RankingWindowFunction; | ||
|
@@ -34,6 +38,8 @@ public void register(BuiltinFunctionRepository repository) { | |
repository.register(rowNumber()); | ||
repository.register(rank()); | ||
repository.register(denseRank()); | ||
repository.register(brain()); | ||
repository.register(simplePattern()); | ||
Comment on lines
+43
to
+44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking are these new algorithm really window function? If users specify |
||
} | ||
|
||
private DefaultFunctionResolver rowNumber() { | ||
|
@@ -48,6 +54,26 @@ private DefaultFunctionResolver denseRank() { | |
return rankingFunction(BuiltinFunctionName.DENSE_RANK.getName(), DenseRankFunction::new); | ||
} | ||
|
||
private DefaultFunctionResolver brain() { | ||
FunctionName functionName = BuiltinFunctionName.BRAIN.getName(); | ||
FunctionSignature functionSignature = | ||
new FunctionSignature(functionName, Collections.singletonList(STRING)); | ||
FunctionBuilder functionBuilder = | ||
(functionProperties, arguments) -> new BufferPatternWindowFunction(arguments); | ||
return new DefaultFunctionResolver( | ||
functionName, ImmutableMap.of(functionSignature, functionBuilder)); | ||
} | ||
|
||
private DefaultFunctionResolver simplePattern() { | ||
FunctionName functionName = BuiltinFunctionName.SIMPLE_PATTERN.getName(); | ||
FunctionSignature functionSignature = | ||
new FunctionSignature(functionName, Collections.singletonList(STRING)); | ||
FunctionBuilder functionBuilder = | ||
(functionProperties, arguments) -> new StreamPatternWindowFunction(arguments); | ||
return new DefaultFunctionResolver( | ||
functionName, ImmutableMap.of(functionSignature, functionBuilder)); | ||
} | ||
|
||
private DefaultFunctionResolver rankingFunction( | ||
FunctionName functionName, Supplier<RankingWindowFunction> constructor) { | ||
FunctionSignature functionSignature = new FunctionSignature(functionName, emptyList()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.sql.expression.window.frame; | ||
|
||
import com.google.common.collect.PeekingIterator; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import lombok.EqualsAndHashCode; | ||
import lombok.Getter; | ||
import lombok.ToString; | ||
import org.opensearch.sql.common.patterns.BrainLogParser; | ||
import org.opensearch.sql.data.model.ExprValue; | ||
import org.opensearch.sql.expression.Expression; | ||
import org.opensearch.sql.expression.window.WindowDefinition; | ||
|
||
@EqualsAndHashCode(callSuper = true) | ||
@ToString | ||
public class BufferPatternRowsWindowFrame extends PeerRowsWindowFrame { | ||
|
||
private final Expression sourceField; | ||
|
||
@Getter private final BrainLogParser logParser; | ||
|
||
private final List<List<String>> preprocessedMessages; | ||
|
||
public BufferPatternRowsWindowFrame( | ||
WindowDefinition windowDefinition, BrainLogParser logParser, Expression sourceField) { | ||
super(windowDefinition); | ||
this.logParser = logParser; | ||
this.sourceField = sourceField; | ||
this.preprocessedMessages = new ArrayList<>(); | ||
} | ||
|
||
@Override | ||
public void load(PeekingIterator<ExprValue> it) { | ||
if (hasNext()) { | ||
return; | ||
} | ||
|
||
loadAllRows(it); | ||
|
||
List<String> logMessages = | ||
peers.stream() | ||
.map( | ||
exprValue -> { | ||
ExprValue value = sourceField.valueOf(exprValue.bindingTuples()); | ||
return value.stringValue(); | ||
}) | ||
.toList(); | ||
this.preprocessedMessages.addAll(logParser.preprocessAllLogs(logMessages)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BufferPatternRowsWindowFrame should have it own spec, doet it means over all rows? @dai-chen WindowFrame definition and How to use WIndowFrame should be seperate, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For simplicity, I set the spec to empty I think we can add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think users are free to use pattern functions added with any window frame definition. |
||
} | ||
|
||
public List<String> currentPreprocessedMessage() { | ||
return this.preprocessedMessages.get(position); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just wonder if pattern is mostly syntax sugar for pattern window function, is new logical operator still required?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After the integration with LogicalWindow, I see they are quite similar. Yeah, I think Pattern operator is probably not needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't find the Window unresolvedPlan. So replace Pattern with Window instead.