[MINOR] Remove repetitive words in docs (#10844)

studystill · web-flow · commit 819788f86516 · 2024-03-11T08:34:32.000+08:00
Signed-off-by: studystill &lt;chenghuiyue@outlook.com&gt;
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystPlansUtils.scala
@@ -144,7 +144,7 @@ trait HoodieCatalystPlansUtils {
   def createMITJoin(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, condition: Option[Expression], hint: String): LogicalPlan
 
   /**
-   * true if both plans produce the same attributes in the the same order
+   * true if both plans produce the same attributes in the same order
    */
   def produceSameOutput(a: LogicalPlan, b: LogicalPlan): Boolean
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java
@@ -199,7 +199,7 @@ public String toString() {
   }
 
   /**
-   * @return size of the the bloomfilter
+   * @return size of the bloomfilter
    */
   public int getVectorSize() {
     return this.vectorSize;
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/bulk/sort/SortOperator.java
@@ -100,7 +100,7 @@ public void open() throws Exception {
 
     collector = new StreamRecordCollector<>(output);
 
-    // register the the metrics.
+    // register the metrics.
     getMetricGroup().gauge("memoryUsedSizeInBytes", (Gauge<Long>) sorter::getUsedMemoryInBytes);
     getMetricGroup().gauge("numSpillFiles", (Gauge<Long>) sorter::getNumSpillFiles);
     getMetricGroup().gauge("spillInBytes", (Gauge<Long>) sorter::getSpillInBytes);
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -63,7 +63,7 @@ import scala.util.{Failure, Success, Try}
  * who's directory level is 3).We can still read it as a partitioned table. We will mapping the
  * partition path (e.g. 2021/03/10) to the only partition column (e.g. "dt").
  *
- * 3、Else the the partition columns size is not equal to the partition directory level and the
+ * 3、Else the partition columns size is not equal to the partition directory level and the
  * size is great than "1" (e.g. partition column is "dt,hh", the partition path is "2021/03/10/12")
  * , we read it as a Non-Partitioned table because we cannot know how to mapping the partition
  * path with the partition columns in this case.
diff --git a/rfc/rfc-76/rfc-76.md b/rfc/rfc-76/rfc-76.md
@@ -61,7 +61,7 @@ Let's consider following scenario: while persisting the dataset, writing one of
 To provide for aforementioned requirement of the records obtaining globally unique synthetic keys either of the 2 following properties have to hold true:
 Key generation has to be deterministic and reproducible (so that upon Spark retries we could be certain same records will be obtaining the identity value they did during previous pass)
 Records have to be getting globally unique identity value every time (such that key collisions are simply impossible)
-Note that, deterministic and reproducible identity value association is is only feasible for the incoming datasets represented as "determinate" RDDs. However, It's worth pointing out that other RDD classes (such as "unordered", "indeterminate") are very rare occurrences involving some inherent non-determinism (varying content, order, etc), and pose challenges in terms of their respective handling by Hudi even w/o auto-generation (for ex, for such RDDs Hudi can't provide for uniqueness guarantee even for "insert" operation in the presence of failures).
+Note that, deterministic and reproducible identity value association is only feasible for the incoming datasets represented as "determinate" RDDs. However, It's worth pointing out that other RDD classes (such as "unordered", "indeterminate") are very rare occurrences involving some inherent non-determinism (varying content, order, etc), and pose challenges in terms of their respective handling by Hudi even w/o auto-generation (for ex, for such RDDs Hudi can't provide for uniqueness guarantee even for "insert" operation in the presence of failures).
 For achieving our goal of providing globally unique keys we're planning on relying on the following synthetic key format comprised of 2 components
 (Reserved) Commit timestamp: Use reserved commit timestamp as prefix (to provide for global uniqueness of rows)
 Row id: unique identifier of the row (record) w/in the provided batch
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
@@ -108,7 +108,7 @@ def test_title():
 #                                                                             #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #                                            
     
-#Enums for the the outcome of parsing a single line
+#Enums for the outcome of parsing a single line
 class Outcomes:
     #error was found so we should stop parsing and exit with error
     ERROR = 0

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ trait HoodieCatalystPlansUtils {`
`144`	`144`	`def createMITJoin(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, condition: Option[Expression], hint: String): LogicalPlan`
`145`	`145`
`146`	`146`	`/**`
`147`		`- * true if both plans produce the same attributes in the the same order`
	`147`	`+ * true if both plans produce the same attributes in the same order`
`148`	`148`	`*/`
`149`	`149`	`def produceSameOutput(a: LogicalPlan, b: LogicalPlan): Boolean`
`150`	`150`	`}`
Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,7 @@ public String toString() {`
`199`	`199`	`}`
`200`	`200`
`201`	`201`	`/**`
`202`		`- * @return size of the the bloomfilter`
	`202`	`+ * @return size of the bloomfilter`
`203`	`203`	`*/`
`204`	`204`	`public int getVectorSize() {`
`205`	`205`	`return this.vectorSize;`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ import scala.util.{Failure, Success, Try}`
`63`	`63`	`* who's directory level is 3).We can still read it as a partitioned table. We will mapping the`
`64`	`64`	`* partition path (e.g. 2021/03/10) to the only partition column (e.g. "dt").`
`65`	`65`	`*`
`66`		`- * 3、Else the the partition columns size is not equal to the partition directory level and the`
	`66`	`+ * 3、Else the partition columns size is not equal to the partition directory level and the`
`67`	`67`	`* size is great than "1" (e.g. partition column is "dt,hh", the partition path is "2021/03/10/12")`
`68`	`68`	`* , we read it as a Non-Partitioned table because we cannot know how to mapping the partition`
`69`	`69`	`* path with the partition columns in this case.`