Skip to content
Open
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"java.compile.nullAnalysis.mode": "disabled"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@


package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class ByteSize implements Token {
private static final Pattern PATTERN = Pattern.compile("(\\d+(?:\\.\\d+)?)([KkMmGgTtPp][Bb])");
private final long bytes;
private final String originalValue;

public ByteSize(String value) {
this.originalValue = value;
Matcher matcher = PATTERN.matcher(value);
if (!matcher.matches()) {
throw new IllegalArgumentException("Invalid byte size format: " + value);
}

double number = Double.parseDouble(matcher.group(1));
String unit = matcher.group(2).toUpperCase();

switch (unit) {
case "KB":
bytes = (long) (number * 1024);
break;
case "MB":
bytes = (long) (number * 1024 * 1024);
break;
case "GB":
bytes = (long) (number * 1024 * 1024 * 1024);
break;
case "TB":
bytes = (long) (number * 1024L * 1024 * 1024 * 1024);
break;
case "PB":
bytes = (long) (number * 1024L * 1024 * 1024 * 1024 * 1024);
break;
default:
throw new IllegalArgumentException("Unsupported byte size unit: " + unit);
}
}

@Override
public Object value() {
return String.format("%.2f%s", getMB(), "MB");
}

@Override
public TokenType type() {
return TokenType.BYTE_SIZE;
}

@Override
public JsonElement toJson() {
JsonObject object = new JsonObject();
object.addProperty("type", type().name());
object.addProperty("value", originalValue);
object.addProperty("bytes", bytes);
return object;
}

public long getBytes() {
return bytes;
}

public double getKB() {
return bytes / 1024.0;
}

public double getMB() {
return bytes / (1024.0 * 1024);
}

public double getGB() {
return bytes / (1024.0 * 1024 * 1024);
}

public double getTB() {
return bytes / (1024.0 * 1024 * 1024 * 1024);
}

public double getPB() {
return bytes / (1024.0 * 1024 * 1024 * 1024 * 1024);
}
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@


package io.cdap.wrangler.api.parser;

import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class TimeDuration implements Token {
private static final Pattern PATTERN = Pattern.compile("(\\d+(?:\\.\\d+)?)([Nn][Ss]|[Mm][Ss]|[Ss])");
private final long nanoseconds;
private final String originalValue;

public TimeDuration(String value) {
this.originalValue = value;
Matcher matcher = PATTERN.matcher(value);
if (!matcher.matches()) {
throw new IllegalArgumentException("Invalid time duration format: " + value);
}

double number = Double.parseDouble(matcher.group(1));
String unit = matcher.group(2).toUpperCase();

switch (unit) {
case "NS":
nanoseconds = (long) number;
break;
case "MS":
nanoseconds = (long) (number * 1_000_000);
break;
case "S":
nanoseconds = (long) (number * 1_000_000_000);
break;
default:
throw new IllegalArgumentException("Unsupported time duration unit: " + unit);
}
}

@Override
public Object value() {
return String.format("%.2f%s", getSeconds(), "s");
}

@Override
public TokenType type() {
return TokenType.TIME_DURATION;
}

@Override
public JsonElement toJson() {
JsonObject object = new JsonObject();
object.addProperty("type", type().name());
object.addProperty("value", originalValue);
object.addProperty("nanoseconds", nanoseconds);
return object;
}

public long getNanoseconds() {
return nanoseconds;
}

public double getMilliseconds() {
return nanoseconds / 1_000_000.0;
}

public double getSeconds() {
return nanoseconds / 1_000_000_000.0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@


package io.cdap.wrangler.api.parser;

import org.junit.Assert;
import org.junit.Test;

public class TimeDurationTest {
@Test
public void testBasicParsing() throws Exception {
TimeDuration duration = new TimeDuration("1s");
Assert.assertEquals(1000000000L, duration.getNanos());
Assert.assertEquals(1000.0, duration.getMillis(), 0.001);
Assert.assertEquals(1.0, duration.getSeconds(), 0.001);
}

@Test
public void testDifferentUnits() throws Exception {
Assert.assertEquals(1L, new TimeDuration("1ns").getNanos());
Assert.assertEquals(1000000L, new TimeDuration("1ms").getNanos());
Assert.assertEquals(1000000000L, new TimeDuration("1s").getNanos());
}

@Test
public void testDecimalValues() throws Exception {
Assert.assertEquals(500000L, new TimeDuration("0.5ms").getNanos());
Assert.assertEquals(1500000L, new TimeDuration("1.5ms").getNanos());
Assert.assertEquals(500000000L, new TimeDuration("0.5s").getNanos());
}

@Test
public void testCaseInsensitive() throws Exception {
Assert.assertEquals(1L, new TimeDuration("1NS").getNanos());
Assert.assertEquals(1000000L, new TimeDuration("1MS").getNanos());
Assert.assertEquals(1000000000L, new TimeDuration("1S").getNanos());
}

@Test(expected = IllegalArgumentException.class)
public void testInvalidFormat() throws Exception {
new TimeDuration("invalid");
}

@Test(expected = IllegalArgumentException.class)
public void testInvalidUnit() throws Exception {
new TimeDuration("1xs");
}

@Test(expected = IllegalArgumentException.class)
public void testNegativeValue() throws Exception {
new TimeDuration("-1s");
}
}
138 changes: 17 additions & 121 deletions wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,3 @@
/*
* Copyright © 2017-2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package io.cdap.wrangler.api.parser;

import io.cdap.wrangler.api.annotations.PublicEvolving;

import java.io.Serializable;

/**
* The TokenType class provides the enumerated types for different types of
* tokens that are supported by the grammar.
Expand All @@ -40,117 +18,35 @@
* @see Expression
* @see Text
* @see TextList
* @see ByteSize
* @see TimeDuration
*/
@PublicEvolving
public enum TokenType implements Serializable {
// ... existing code ...
/**
* Represents the enumerated type for the object {@code DirectiveName} type.
* This type is associated with the token that is recognized as a directive
* name within the recipe.
*/
DIRECTIVE_NAME,

/**
* Represents the enumerated type for the object of {@code ColumnName} type.
* This type is associated with token that represents the column as defined
* by the grammar as :<column-name>.
*/
COLUMN_NAME,

/**
* Represents the enumerated type for the object of {@code Text} type.
* This type is associated with the token that is either enclosed within a single quote(')
* or a double quote (") as string.
*/
TEXT,

/**
* Represents the enumerated type for the object of {@code Numeric} type.
* This type is associated with the token that is either a integer or real number.
*/
NUMERIC,

/**
* Represents the enumerated type for the object of {@code Bool} type.
* This type is associated with the token that either represents string 'true' or 'false'.
*/
BOOLEAN,

/**
* Represents the enumerated type for the object of type {@code BoolList} type.
* This type is associated with the rule that is a collection of {@code Boolean} values
* separated by comman(,). E.g.
* <code>
* ColumnName[,ColumnName]*
* </code>
*/
COLUMN_NAME_LIST,

/**
* Represents the enumerated type for the object of type {@code TextList} type.
* This type is associated with the comma separated text represented were each text
* is enclosed within a single quote (') or double quote (") and each text is separated
* by comma (,). E.g.
* <code>
* Text[,Text]*
* </code>
*/
TEXT_LIST,

/**
* Represents the enumerated type for the object of type {@code NumericList} type.
* This type is associated with the collection of {@code Numeric} values separated by
* comma(,). E.g.
* <code>
* Numeric[,Numeric]*
* </code>
*
*/
NUMERIC_LIST,

/**
* Represents the enumerated type for the object of type {@code BoolList} type.
* This type is associated with the collection of {@code Bool} values separated by
* comma(,). E.g.
* <code>
* Boolean[,Boolean]*
* </code>
*/
BOOLEAN_LIST,

/**
* Represents the enumerated type for the object of type {@code Expression} type.
* This type is associated with code block that either represents a condition or
* an expression. E.g.
* <code>
* exp:{ <expression || condition> }
* </code>
* Represents the enumerated type for the object of type {@code String} with restrictions
* on characters that can be present in a string.
*/
EXPRESSION,
IDENTIFIER,

/**
* Represents the enumerated type for the object of type {@code Properties} type.
* This type is associated with a collection of key and value pairs all separated
* by a comma(,). E.g.
* Represents the enumerated type for the object of type {@code ByteSize} type.
* This type is associated with byte size values with units (KB, MB, GB, TB, PB).
* E.g.
* <code>
* prop:{ <key>=<value>[,<key>=<value>]*}
* 1KB, 2MB, 3GB, 4TB, 5PB
* </code>
*/
PROPERTIES,
BYTE_SIZE,

/**
* Represents the enumerated type for the object of type {@code Ranges} types.
* This type is associated with a collection of range represented in the form shown
* below
* Represents the enumerated type for the object of type {@code TimeDuration} type.
* This type is associated with time duration values with units (ns, ms, s).
* E.g.
* <code>
* <start>:<end>=value[,<start>:<end>=value]*
* 1ns, 2ms, 3s
* </code>
*/
RANGES,

/**
* Represents the enumerated type for the object of type {@code String} with restrictions
* on characters that can be present in a string.
*/
IDENTIFIER
}
TIME_DURATION
}
Loading