data-integrations · Adnanmuhamed · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "java.compile.nullAnalysis.mode": "disabled"
+}
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java
@@ -0,0 +1,89 @@
+
+
+ package io.cdap.wrangler.api.parser;
+
+ import com.google.gson.JsonElement;
+ import com.google.gson.JsonObject;
+ import java.util.regex.Matcher;
+ import java.util.regex.Pattern;
+
+
+ public class ByteSize implements Token {
+   private static final Pattern PATTERN = Pattern.compile("(\\d+(?:\\.\\d+)?)([KkMmGgTtPp][Bb])");
+   private final long bytes;
+   private final String originalValue;
+
+   public ByteSize(String value) {
+     this.originalValue = value;
+     Matcher matcher = PATTERN.matcher(value);
+     if (!matcher.matches()) {
+       throw new IllegalArgumentException("Invalid byte size format: " + value);
+     }
+
+     double number = Double.parseDouble(matcher.group(1));
+     String unit = matcher.group(2).toUpperCase();
+
+     switch (unit) {
+       case "KB":
+         bytes = (long) (number * 1024);
+         break;
+       case "MB":
+         bytes = (long) (number * 1024 * 1024);
+         break;
+       case "GB":
+         bytes = (long) (number * 1024 * 1024 * 1024);
+         break;
+       case "TB":
+         bytes = (long) (number * 1024L * 1024 * 1024 * 1024);
+         break;
+       case "PB":
+         bytes = (long) (number * 1024L * 1024 * 1024 * 1024 * 1024);
+         break;
+       default:
+         throw new IllegalArgumentException("Unsupported byte size unit: " + unit);
+     }
+   }
+
+   @Override
+   public Object value() {
+     return String.format("%.2f%s", getMB(), "MB");
+   }
+
+   @Override
+   public TokenType type() {
+     return TokenType.BYTE_SIZE;
+   }
+
+   @Override
+   public JsonElement toJson() {
+     JsonObject object = new JsonObject();
+     object.addProperty("type", type().name());
+     object.addProperty("value", originalValue);
+     object.addProperty("bytes", bytes);
+     return object;
+   }
+
+   public long getBytes() {
+     return bytes;
+   }
+
+   public double getKB() {
+     return bytes / 1024.0;
+   }
+
+   public double getMB() {
+     return bytes / (1024.0 * 1024);
+   }
+
+   public double getGB() {
+     return bytes / (1024.0 * 1024 * 1024);
+   }
+
+   public double getTB() {
+     return bytes / (1024.0 * 1024 * 1024 * 1024);
+   }
+
+   public double getPB() {
+     return bytes / (1024.0 * 1024 * 1024 * 1024 * 1024);
+   }
+ } 
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSizeTest.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSizeTest.java
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java
@@ -0,0 +1,71 @@
+
+
+ package io.cdap.wrangler.api.parser;
+
+ import com.google.gson.JsonElement;
+ import com.google.gson.JsonObject;
+ import java.util.regex.Matcher;
+ import java.util.regex.Pattern;
+
+
+ public class TimeDuration implements Token {
+   private static final Pattern PATTERN = Pattern.compile("(\\d+(?:\\.\\d+)?)([Nn][Ss]|[Mm][Ss]|[Ss])");
+   private final long nanoseconds;
+   private final String originalValue;
+
+   public TimeDuration(String value) {
+     this.originalValue = value;
+     Matcher matcher = PATTERN.matcher(value);
+     if (!matcher.matches()) {
+       throw new IllegalArgumentException("Invalid time duration format: " + value);
+     }
+
+     double number = Double.parseDouble(matcher.group(1));
+     String unit = matcher.group(2).toUpperCase();
+
+     switch (unit) {
+       case "NS":
+         nanoseconds = (long) number;
+         break;
+       case "MS":
+         nanoseconds = (long) (number * 1_000_000);
+         break;
+       case "S":
+         nanoseconds = (long) (number * 1_000_000_000);
+         break;
+       default:
+         throw new IllegalArgumentException("Unsupported time duration unit: " + unit);
+     }
+   }
+
+   @Override
+   public Object value() {
+     return String.format("%.2f%s", getSeconds(), "s");
+   }
+
+   @Override
+   public TokenType type() {
+     return TokenType.TIME_DURATION;
+   }
+
+   @Override
+   public JsonElement toJson() {
+     JsonObject object = new JsonObject();
+     object.addProperty("type", type().name());
+     object.addProperty("value", originalValue);
+     object.addProperty("nanoseconds", nanoseconds);
+     return object;
+   }
+
+   public long getNanoseconds() {
+     return nanoseconds;
+   }
+
+   public double getMilliseconds() {
+     return nanoseconds / 1_000_000.0;
+   }
+
+   public double getSeconds() {
+     return nanoseconds / 1_000_000_000.0;
+   }
+ } 
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDurationTest.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDurationTest.java
@@ -0,0 +1,52 @@
+
+
+ package io.cdap.wrangler.api.parser;
+
+ import org.junit.Assert;
+ import org.junit.Test;
+
+ public class TimeDurationTest {
+   @Test
+   public void testBasicParsing() throws Exception {
+     TimeDuration duration = new TimeDuration("1s");
+     Assert.assertEquals(1000000000L, duration.getNanos());
+     Assert.assertEquals(1000.0, duration.getMillis(), 0.001);
+     Assert.assertEquals(1.0, duration.getSeconds(), 0.001);
+   }
+
+   @Test
+   public void testDifferentUnits() throws Exception {
+     Assert.assertEquals(1L, new TimeDuration("1ns").getNanos());
+     Assert.assertEquals(1000000L, new TimeDuration("1ms").getNanos());
+     Assert.assertEquals(1000000000L, new TimeDuration("1s").getNanos());
+   }
+
+   @Test
+   public void testDecimalValues() throws Exception {
+     Assert.assertEquals(500000L, new TimeDuration("0.5ms").getNanos());
+     Assert.assertEquals(1500000L, new TimeDuration("1.5ms").getNanos());
+     Assert.assertEquals(500000000L, new TimeDuration("0.5s").getNanos());
+   }
+
+   @Test
+   public void testCaseInsensitive() throws Exception {
+     Assert.assertEquals(1L, new TimeDuration("1NS").getNanos());
+     Assert.assertEquals(1000000L, new TimeDuration("1MS").getNanos());
+     Assert.assertEquals(1000000000L, new TimeDuration("1S").getNanos());
+   }
+
+   @Test(expected = IllegalArgumentException.class)
+   public void testInvalidFormat() throws Exception {
+     new TimeDuration("invalid");
+   }
+
+   @Test(expected = IllegalArgumentException.class)
+   public void testInvalidUnit() throws Exception {
+     new TimeDuration("1xs");
+   }
+
+   @Test(expected = IllegalArgumentException.class)
+   public void testNegativeValue() throws Exception {
+     new TimeDuration("-1s");
+   }
+ }
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java
@@ -1,25 +1,3 @@
-/*
- * Copyright © 2017-2019 Cask Data, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package io.cdap.wrangler.api.parser;
-
-import io.cdap.wrangler.api.annotations.PublicEvolving;
-
-import java.io.Serializable;
-
 /**
  * The TokenType class provides the enumerated types for different types of
  * tokens that are supported by the grammar.
@@ -40,117 +18,35 @@
  * @see Expression
  * @see Text
  * @see TextList
+ * @see ByteSize
+ * @see TimeDuration
  */
 @PublicEvolving
 public enum TokenType implements Serializable {
+// ... existing code ...
   /**
-   * Represents the enumerated type for the object {@code DirectiveName} type.
-   * This type is associated with the token that is recognized as a directive
-   * name within the recipe.
-   */
-  DIRECTIVE_NAME,
-
-  /**
-   * Represents the enumerated type for the object of {@code ColumnName} type.
-   * This type is associated with token that represents the column as defined
-   * by the grammar as :<column-name>.
-   */
-  COLUMN_NAME,
-
-  /**
-   * Represents the enumerated type for the object of {@code Text} type.
-   * This type is associated with the token that is either enclosed within a single quote(')
-   * or a double quote (") as string.
-   */
-  TEXT,
-
-  /**
-   * Represents the enumerated type for the object of {@code Numeric} type.
-   * This type is associated with the token that is either a integer or real number.
-   */
-  NUMERIC,
-
-  /**
-   * Represents the enumerated type for the object of {@code Bool} type.
-   * This type is associated with the token that either represents string 'true' or 'false'.
-   */
-  BOOLEAN,
-
-  /**
-   * Represents the enumerated type for the object of type {@code BoolList} type.
-   * This type is associated with the rule that is a collection of {@code Boolean} values
-   * separated by comman(,). E.g.
-   * <code>
-   *   ColumnName[,ColumnName]*
-   * </code>
-   */
-  COLUMN_NAME_LIST,
-
-  /**
-   * Represents the enumerated type for the object of type {@code TextList} type.
-   * This type is associated with the comma separated text represented were each text
-   * is enclosed within a single quote (') or double quote (") and each text is separated
-   * by comma (,). E.g.
-   * <code>
-   *   Text[,Text]*
-   * </code>
-   */
-  TEXT_LIST,
-
-  /**
-   * Represents the enumerated type for the object of type {@code NumericList} type.
-   * This type is associated with the collection of {@code Numeric} values separated by
-   * comma(,). E.g.
-   * <code>
-   *   Numeric[,Numeric]*
-   * </code>
-   *
-   */
-  NUMERIC_LIST,
-
-  /**
-   * Represents the enumerated type for the object of type {@code BoolList} type.
-   * This type is associated with the collection of {@code Bool} values separated by
-   * comma(,). E.g.
-   * <code>
-   *   Boolean[,Boolean]*
-   * </code>
-   */
-  BOOLEAN_LIST,
-
-  /**
-   * Represents the enumerated type for the object of type {@code Expression} type.
-   * This type is associated with code block that either represents a condition or
-   * an expression. E.g.
-   * <code>
-   *   exp:{ <expression || condition> }
-   * </code>
+   * Represents the enumerated type for the object of type {@code String} with restrictions
+   * on characters that can be present in a string.
    */
-  EXPRESSION,
+  IDENTIFIER,
 
   /**
-   * Represents the enumerated type for the object of type {@code Properties} type.
-   * This type is associated with a collection of key and value pairs all separated
-   * by a comma(,). E.g.
+   * Represents the enumerated type for the object of type {@code ByteSize} type.
+   * This type is associated with byte size values with units (KB, MB, GB, TB, PB).
+   * E.g.
    * <code>
-   *   prop:{ <key>=<value>[,<key>=<value>]*}
+   *   1KB, 2MB, 3GB, 4TB, 5PB
    * </code>
    */
-  PROPERTIES,
+  BYTE_SIZE,
 
   /**
-   * Represents the enumerated type for the object of type {@code Ranges} types.
-   * This type is associated with a collection of range represented in the form shown
-   * below
+   * Represents the enumerated type for the object of type {@code TimeDuration} type.
+   * This type is associated with time duration values with units (ns, ms, s).
+   * E.g.
    * <code>
-   *   <start>:<end>=value[,<start>:<end>=value]*
+   *   1ns, 2ms, 3s
    * </code>
    */
-  RANGES,
-
-  /**
-   * Represents the enumerated type for the object of type {@code String} with restrictions
-   * on characters that can be present in a string.
-   */
-  IDENTIFIER
-}
+  TIME_DURATION
+}