From e1e3e4569b7681b1bdf6049dd7d23174fe0af6ef Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Fri, 1 Feb 2019 17:49:49 +0900 Subject: [PATCH 001/235] initial commit --- .../nemo/client/ClientEndpointTest.java | 4 +- .../executionproperty/DataStoreProperty.java | 3 +- .../annotating/CrailEdgeDataStorePass.java | 52 ++++++ .../optimizer/policy/CrailPolicy.java | 57 ++++++ .../policy/DisaggregationPolicy.java | 2 +- .../java/org/apache/nemo/conf/JobConf.java | 7 + .../nemo/examples/beam/WordCountITCase.java | 34 ++++ runtime/executor/pom.xml | 5 + .../executor/data/BlockManagerWorker.java | 9 +- .../executor/data/block/FileBlock.java | 36 ++-- .../executor/data/stores/CrailFileStore.java | 165 ++++++++++++++++++ .../executor/data/stores/RemoteFileStore.java | 4 +- .../datatransfer/BlockOutputWriter.java | 2 +- .../runtime/executor/data/BlockStoreTest.java | 4 +- .../nemo/runtime/master/PlanStateManager.java | 10 +- .../master/scheduler/BatchScheduler.java | 18 +- .../master/scheduler/StreamingScheduler.java | 2 +- .../master/scheduler/TaskDispatcher.java | 2 +- .../runtime/master/PlanStateManagerTest.java | 8 +- 19 files changed, 391 insertions(+), 33 deletions(-) create mode 100644 compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java create mode 100644 compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java create mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java diff --git a/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java b/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java index 1a12968673..2c54ef3542 100644 --- a/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java +++ b/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java @@ -77,8 +77,8 @@ public void testState() throws Exception { final List tasks = physicalPlan.getStageDAG().getTopologicalSort().stream() .flatMap(stage -> planStateManager.getTaskAttemptsToSchedule(stage.getId()).stream()) .collect(Collectors.toList()); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING)); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING, 0)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE, 0)); assertEquals(PlanState.State.COMPLETE, clientEndpoint.waitUntilJobFinish()); } diff --git a/common/src/main/java/org/apache/nemo/common/ir/edge/executionproperty/DataStoreProperty.java b/common/src/main/java/org/apache/nemo/common/ir/edge/executionproperty/DataStoreProperty.java index e53157b18e..9de8968976 100644 --- a/common/src/main/java/org/apache/nemo/common/ir/edge/executionproperty/DataStoreProperty.java +++ b/common/src/main/java/org/apache/nemo/common/ir/edge/executionproperty/DataStoreProperty.java @@ -49,6 +49,7 @@ public enum Value { MemoryStore, SerializedMemoryStore, LocalFileStore, - GlusterFileStore + GlusterFileStore, + CrailFileStore } } diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java new file mode 100644 index 0000000000..a57de8abfb --- /dev/null +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.nemo.compiler.optimizer.pass.compiletime.annotating; + +import org.apache.nemo.common.ir.edge.IREdge; +import org.apache.nemo.common.ir.edge.executionproperty.DataStoreProperty; +import org.apache.nemo.common.ir.vertex.IRVertex; +import org.apache.nemo.common.dag.DAG; +import org.apache.nemo.compiler.optimizer.pass.compiletime.Requires; + +import java.util.List; + +/** + * A pass to support Disaggregated Resources by tagging edges. + * This pass handles the DataStore ExecutionProperty. + */ +@Annotates(DataStoreProperty.class) +@Requires(DataStoreProperty.class) +public final class CrailEdgeDataStorePass extends AnnotatingPass { + /** + * Default constructor. + */ + public CrailEdgeDataStorePass() { + super(CrailEdgeDataStorePass.class); + } + + @Override + public DAG apply(final DAG dag) { + dag.getVertices().forEach(vertex -> { // Initialize the DataStore of the DAG with GlusterFileStore. + final List inEdges = dag.getIncomingEdgesOf(vertex); + inEdges.forEach(edge -> + edge.setPropertyPermanently(DataStoreProperty.of(DataStoreProperty.Value.CrailFileStore))); + }); + return dag; + } +} diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java new file mode 100644 index 0000000000..8336de7d5b --- /dev/null +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.nemo.compiler.optimizer.policy; + +import org.apache.nemo.common.dag.DAG; +import org.apache.nemo.common.eventhandler.PubSubEventHandlerWrapper; +import org.apache.nemo.common.ir.edge.IREdge; +import org.apache.nemo.common.ir.vertex.IRVertex; +import org.apache.nemo.compiler.optimizer.pass.compiletime.annotating.*; +import org.apache.nemo.compiler.optimizer.pass.compiletime.composite.DefaultCompositePass; +import org.apache.nemo.compiler.optimizer.pass.compiletime.composite.LoopOptimizationCompositePass; +import org.apache.reef.tang.Injector; + +/** + * A policy to demonstrate the disaggregation optimization, that uses GlusterFS as file storage. + */ +public final class CrailPolicy implements Policy { + public static final PolicyBuilder BUILDER = + new PolicyBuilder() + .registerCompileTimePass(new CrailEdgeDataStorePass()) //***확인 + .registerCompileTimePass(new LoopOptimizationCompositePass()) + .registerCompileTimePass(new DefaultCompositePass()); + private final Policy policy; + + /** + * Default constructor. + */ + public CrailPolicy() { + this.policy = BUILDER.build(); + } + + @Override + public DAG runCompileTimeOptimization(final DAG dag, final String dagDirectory) { + return this.policy.runCompileTimeOptimization(dag, dagDirectory); + } + + @Override + public void registerRunTimeOptimizations(final Injector injector, final PubSubEventHandlerWrapper pubSubWrapper) { + this.policy.registerRunTimeOptimizations(injector, pubSubWrapper); + } +} diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java index c7cc8667f1..12e620e25b 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java @@ -33,7 +33,7 @@ public final class DisaggregationPolicy implements Policy { public static final PolicyBuilder BUILDER = new PolicyBuilder() - .registerCompileTimePass(new DisaggregationEdgeDataStorePass()) + .registerCompileTimePass(new DisaggregationEdgeDataStorePass()) //***확인 .registerCompileTimePass(new LoopOptimizationCompositePass()) .registerCompileTimePass(new DefaultCompositePass()); private final Policy policy; diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 1bdb7c9e0a..16609a48d3 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -76,6 +76,13 @@ public final class FileDirectory implements Name { public final class GlusterVolumeDirectory implements Name { } + /** + * Directory points the CrailFileSystem to store files. + */ + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "../tmp_crail") + public final class CrailVolumeDirectory implements Name { + } + //////////////////////////////// Client-Driver RPC /** diff --git a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java index e31a8c5a4d..97bec8156b 100644 --- a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java +++ b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java @@ -23,6 +23,9 @@ import org.apache.nemo.common.test.ExampleTestArgs; import org.apache.nemo.common.test.ExampleTestUtil; import org.apache.nemo.compiler.optimizer.policy.ConditionalLargeShufflePolicy; +import org.apache.nemo.compiler.optimizer.policy.CrailPolicy; +import org.apache.nemo.compiler.optimizer.policy.DefaultPolicy; +import org.apache.nemo.compiler.optimizer.policy.DisaggregationPolicy; import org.apache.nemo.examples.beam.policy.*; import org.junit.After; import org.junit.Before; @@ -72,6 +75,7 @@ public void test() throws Exception { .build()); } + @Test (timeout = ExampleTestArgs.TIMEOUT) public void testLargeShuffle() throws Exception { JobLauncher.main(builder @@ -127,4 +131,34 @@ public void testSpeculativeExecution() throws Exception { .addOptimizationPolicy(AggressiveSpeculativeCloningPolicyParallelismFive.class.getCanonicalName()) .build()); } + + @Test (timeout = ExampleTestArgs.TIMEOUT) + public void testDisaggregationPolicy() throws Exception{ + JobLauncher.main(builder + .addResourceJson(executorResourceFileName) + .addJobId(WordCountITCase.class.getSimpleName() + " DisaggregationPolicy") + .addMaxTaskAttempt(Integer.MAX_VALUE) + .addOptimizationPolicy(DisaggregationPolicy.class.getCanonicalName()) + .build()); + } + + @Test (timeout = ExampleTestArgs.TIMEOUT) + public void testDefaultPolicy() throws Exception{ + JobLauncher.main(builder + .addResourceJson(executorResourceFileName) + .addJobId(WordCountITCase.class.getSimpleName() + " DefaultPolicy") + .addMaxTaskAttempt(Integer.MAX_VALUE) + .addOptimizationPolicy(DefaultPolicy.class.getCanonicalName()) + .build()); + } + + @Test (timeout = ExampleTestArgs.TIMEOUT) + public void testCrailPolicy() throws Exception{ + JobLauncher.main(builder + .addResourceJson(executorResourceFileName) + .addJobId(WordCountITCase.class.getSimpleName() + " CrailPolicy") + .addMaxTaskAttempt(Integer.MAX_VALUE) + .addOptimizationPolicy(CrailPolicy.class.getCanonicalName()) + .build()); + } } diff --git a/runtime/executor/pom.xml b/runtime/executor/pom.xml index ae7a770b10..7ebf2ae27e 100644 --- a/runtime/executor/pom.xml +++ b/runtime/executor/pom.xml @@ -64,6 +64,11 @@ under the License. 0.1-SNAPSHOT test + + org.apache.crail + crail-client + 1.2-incubating-SNAPSHOT + 2.13.0 2.0.0-beta.5 - 3.0.0-M1 + 2.19.1 4.12 From 081e9b55bdc117f5196b59e571b61650c283d31e Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Thu, 14 Feb 2019 02:19:34 +0000 Subject: [PATCH 009/235] ClassNotFoundException solved --- beam_test_executor_resources.json | 12 ++++++++++++ bin/run_beam.sh | 3 ++- pom.xml | 11 +++++------ 3 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 beam_test_executor_resources.json diff --git a/beam_test_executor_resources.json b/beam_test_executor_resources.json new file mode 100644 index 0000000000..91f7aee668 --- /dev/null +++ b/beam_test_executor_resources.json @@ -0,0 +1,12 @@ +[ + { + "type": "Transient", + "memory_mb": 512, + "capacity": 4 + }, + { + "type": "Reserved", + "memory_mb": 512, + "capacity": 4 + } +] diff --git a/bin/run_beam.sh b/bin/run_beam.sh index cbd082c7a1..c2dcf6611a 100755 --- a/bin/run_beam.sh +++ b/bin/run_beam.sh @@ -19,4 +19,5 @@ java -Dlog4j.configuration=file://`pwd`/log4j.properties -cp examples/beam/target/nemo-examples-beam-$(mvn -q \ -Dexec.executable=echo -Dexec.args='${project.version}' \ - --non-recursive exec:exec)-shaded.jar:`yarn classpath` org.apache.nemo.client.JobLauncher "$@" + --non-recursive exec:exec)-shaded.jar:$CRAIL_JAR:`yarn classpath` org.apache.nemo.client.JobLauncher "$@" + diff --git a/pom.xml b/pom.xml index 10eec08b39..b52f3c1529 100644 --- a/pom.xml +++ b/pom.xml @@ -114,8 +114,12 @@ under the License. ${powermock.version} test + + org.apache.crail + crail-assembly + 1.2-incubating-SNAPSHOT + - @@ -150,11 +154,6 @@ under the License. 1.6.2 test - - org.apache.crail - crail-client - 1.2-incubating-SNAPSHOT - From c4e7de7a7018a28fb04c81ed83e0e1d342459d77 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 12:25:58 +0900 Subject: [PATCH 010/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 88450d8972..381013758d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,6 +96,7 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + LOG.info("HY:crail file created"); } catch (Exception e) { e.printStackTrace(); } From 19b88c8a4775cbc4e73c3ee5346aa59dcc6452f5 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 12:52:40 +0900 Subject: [PATCH 011/235] GlusterFS partially substituted for CrailFileStore --- .../apache/nemo/runtime/executor/data/BlockManagerWorker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index d2ce64d8f6..2ae4fc8eb0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -264,7 +264,7 @@ public void writeBlock(final Block block, .setBlockId(blockId) .setState(ControlMessage.BlockStateFromExecutor.AVAILABLE); - if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore)) { + if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore) || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { blockStateChangedMsgBuilder.setLocation(REMOTE_FILE_STORE); } else { blockStateChangedMsgBuilder.setLocation(executorId); @@ -520,7 +520,7 @@ private static DataStoreProperty.Value convertBlockStore( case LOCAL_FILE: return DataStoreProperty.Value.LocalFileStore; case REMOTE_FILE: - return DataStoreProperty.Value.GlusterFileStore; + return DataStoreProperty.Value.CrailFileStore; default: throw new UnsupportedBlockStoreException(new Exception("This block store is not yet supported")); } From 1ed05ffd9ccd3d9af3484e2c14f7b66977d6f3c4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 14:51:50 +0900 Subject: [PATCH 012/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 381013758d..805585468b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,8 +96,9 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - LOG.info("HY:crail file created"); + LOG.info("HY: crail file block created"); } catch (Exception e) { + LOG.info("HY: crail file block creation failed"); e.printStackTrace(); } } From 1104561d2ad12c1d79f1d4a6394451557a6cb24a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:06:07 +0900 Subject: [PATCH 013/235] Create Parent Node --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 16609a48d3..69995a216b 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "../tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") public final class CrailVolumeDirectory implements Name { } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 1c515835a7..f9180a217d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -64,10 +64,10 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri final SerializerManager serializerManager) throws Exception { super(serializerManager); this.fileDirectory =volumeDirectory + "/" + jobId; - new File(fileDirectory).mkdirs(); + //new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - //CrailFile file = fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + CrailFile file = fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.PARENT, CrailLocationClass.PARENT, true).get().asFile(); } @Override @@ -77,6 +77,7 @@ public Block createBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final RemoteFileMetadata metadata = RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 86374856fd2ec507351db5a84b7ed12ce3835e34 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:13:02 +0900 Subject: [PATCH 014/235] Revert "logging" This reverts commit 1ed05ffd9ccd3d9af3484e2c14f7b66977d6f3c4. --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 805585468b..381013758d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,9 +96,8 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - LOG.info("HY: crail file block created"); + LOG.info("HY:crail file created"); } catch (Exception e) { - LOG.info("HY: crail file block creation failed"); e.printStackTrace(); } } From a8136858f947b67feb29813dcf408e6c5b49d738 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:13:50 +0900 Subject: [PATCH 015/235] Revert "Revert "logging"" This reverts commit 86374856fd2ec507351db5a84b7ed12ce3835e34. --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 381013758d..805585468b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,8 +96,9 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - LOG.info("HY:crail file created"); + LOG.info("HY: crail file block created"); } catch (Exception e) { + LOG.info("HY: crail file block creation failed"); e.printStackTrace(); } } From 047da1d585261cf7878eb9fac555e5854712fa9b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:14:10 +0900 Subject: [PATCH 016/235] Revert "logging" This reverts commit 1ed05ffd9ccd3d9af3484e2c14f7b66977d6f3c4. --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 805585468b..381013758d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,9 +96,8 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - LOG.info("HY: crail file block created"); + LOG.info("HY:crail file created"); } catch (Exception e) { - LOG.info("HY: crail file block creation failed"); e.printStackTrace(); } } From a8ba1b9f05088af83bd16dee3c818d3b414a817e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:18:02 +0900 Subject: [PATCH 017/235] Revert "Create Parent Node" This reverts commit 1104561d2ad12c1d79f1d4a6394451557a6cb24a. --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 69995a216b..16609a48d3 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "../tmp_crail") public final class CrailVolumeDirectory implements Name { } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index f9180a217d..1c515835a7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -64,10 +64,10 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri final SerializerManager serializerManager) throws Exception { super(serializerManager); this.fileDirectory =volumeDirectory + "/" + jobId; - //new File(fileDirectory).mkdirs(); + new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - CrailFile file = fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.PARENT, CrailLocationClass.PARENT, true).get().asFile(); + //CrailFile file = fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); } @Override @@ -77,7 +77,6 @@ public Block createBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final RemoteFileMetadata metadata = RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); - return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 9c27f876425f3b182541eb90508ca8482b67db69 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:36:12 +0900 Subject: [PATCH 018/235] CrailStore.create debugging --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 1c515835a7..8d566e162b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -67,7 +67,8 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - //CrailFile file = fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + //Parent Node (/tmp_crail/jobId/) creation needed + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.PARENT, CrailLocationClass.DEFAULT, false); } @Override From 94262a29836a005fb8b5f5f9e711e1180cdf8436 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:42:36 +0900 Subject: [PATCH 019/235] dir naming edit --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 16609a48d3..69995a216b 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "../tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") public final class CrailVolumeDirectory implements Name { } From afb8778fcd45f62ff9b993b0d83239648c32add0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 15:50:32 +0900 Subject: [PATCH 020/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 381013758d..805585468b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,8 +96,9 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - LOG.info("HY:crail file created"); + LOG.info("HY: crail file block created"); } catch (Exception e) { + LOG.info("HY: crail file block creation failed"); e.printStackTrace(); } } From ac7f127f3df63610fb89516d772474f53d9e05ea Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 16:10:48 +0900 Subject: [PATCH 021/235] CrailStore debug --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 69995a216b..1ff516a45e 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "./tmp_crail") public final class CrailVolumeDirectory implements Name { } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 8d566e162b..7ef1934f32 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -68,7 +68,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); //Parent Node (/tmp_crail/jobId/) creation needed - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.PARENT, CrailLocationClass.DEFAULT, false); + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); } @Override From 9fa9c08479113d7d211639c6d0f51eebe6f1e1c9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 16:16:30 +0900 Subject: [PATCH 022/235] logging --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 7ef1934f32..f4ab363bac 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -29,6 +29,8 @@ import org.apache.nemo.runtime.executor.data.metadata.RemoteFileMetadata; import org.apache.nemo.runtime.executor.data.block.FileBlock; import org.apache.reef.tang.annotations.Parameter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; @@ -46,6 +48,7 @@ */ @ThreadSafe public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { + private static final Logger LOG = LoggerFactory.getLogger(CrailFileStore.class.getName()); private final String fileDirectory; private CrailConfiguration conf = null; private CrailStore fs = null; @@ -67,6 +70,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); + LOG.info("HY: CrailStore created. Not yet for the file directory itself"); //Parent Node (/tmp_crail/jobId/) creation needed fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); } From 6dd759f707b791badf6f9c53cc7498278a72a443 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 16:39:09 +0900 Subject: [PATCH 023/235] debug --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pom.xml b/pom.xml index b52f3c1529..50624b42be 100644 --- a/pom.xml +++ b/pom.xml @@ -119,6 +119,11 @@ under the License. crail-assembly 1.2-incubating-SNAPSHOT + + org.apache.crail + crail-client + 1.2-incubating-SNAPSHOT + From 6361330417db41215967f3ab20fca429c55cb5c9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 16:49:29 +0900 Subject: [PATCH 024/235] debug --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index f4ab363bac..8bf80bf9cd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -70,7 +70,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - LOG.info("HY: CrailStore created. Not yet for the file directory itself"); + //LOG.info("HY: CrailStore created. Not yet for the file directory itself"); //Parent Node (/tmp_crail/jobId/) creation needed fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); } From 5551cbf2df8c6c211238c907b18d3949ea7d4acc Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Thu, 14 Feb 2019 07:51:17 +0000 Subject: [PATCH 025/235] dependency c --- client/dependency-reduced-pom.xml | 74 ++++++++++++ common/dependency-reduced-pom.xml | 102 ++++++++++++++++ compiler/backend/dependency-reduced-pom.xml | 92 +++++++++++++++ .../frontend/beam/dependency-reduced-pom.xml | 109 ++++++++++++++++++ compiler/optimizer/dependency-reduced-pom.xml | 86 ++++++++++++++ conf/dependency-reduced-pom.xml | 74 ++++++++++++ pom.xml | 2 +- runtime/common/dependency-reduced-pom.xml | 80 +++++++++++++ runtime/driver/dependency-reduced-pom.xml | 74 ++++++++++++ runtime/executor/dependency-reduced-pom.xml | 98 ++++++++++++++++ runtime/master/dependency-reduced-pom.xml | 86 ++++++++++++++ runtime/test/dependency-reduced-pom.xml | 74 ++++++++++++ 12 files changed, 950 insertions(+), 1 deletion(-) create mode 100644 client/dependency-reduced-pom.xml create mode 100644 common/dependency-reduced-pom.xml create mode 100644 compiler/backend/dependency-reduced-pom.xml create mode 100644 compiler/frontend/beam/dependency-reduced-pom.xml create mode 100644 compiler/optimizer/dependency-reduced-pom.xml create mode 100644 conf/dependency-reduced-pom.xml create mode 100644 runtime/common/dependency-reduced-pom.xml create mode 100644 runtime/driver/dependency-reduced-pom.xml create mode 100644 runtime/executor/dependency-reduced-pom.xml create mode 100644 runtime/master/dependency-reduced-pom.xml create mode 100644 runtime/test/dependency-reduced-pom.xml diff --git a/client/dependency-reduced-pom.xml b/client/dependency-reduced-pom.xml new file mode 100644 index 0000000000..ab50517631 --- /dev/null +++ b/client/dependency-reduced-pom.xml @@ -0,0 +1,74 @@ + + + + nemo-project + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-client + Nemo Client + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/common/dependency-reduced-pom.xml b/common/dependency-reduced-pom.xml new file mode 100644 index 0000000000..cc1ca5e360 --- /dev/null +++ b/common/dependency-reduced-pom.xml @@ -0,0 +1,102 @@ + + + + nemo-project + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-common + Nemo Common + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.7.2 + provided + + + hadoop-yarn-common + org.apache.hadoop + + + slf4j-log4j12 + org.slf4j + + + hadoop-annotations + org.apache.hadoop + + + guice-servlet + com.google.inject.extensions + + + netty + io.netty + + + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/compiler/backend/dependency-reduced-pom.xml b/compiler/backend/dependency-reduced-pom.xml new file mode 100644 index 0000000000..d2e7932adc --- /dev/null +++ b/compiler/backend/dependency-reduced-pom.xml @@ -0,0 +1,92 @@ + + + + nemo-compiler + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-compiler-backend + Nemo Compiler Backend + + + org.apache.nemo + nemo-compiler-optimizer + 0.1-SNAPSHOT + test + + + org.slf4j + slf4j-simple + 1.6.2 + test + + + slf4j-api + org.slf4j + + + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/compiler/frontend/beam/dependency-reduced-pom.xml b/compiler/frontend/beam/dependency-reduced-pom.xml new file mode 100644 index 0000000000..b1be2a8ac1 --- /dev/null +++ b/compiler/frontend/beam/dependency-reduced-pom.xml @@ -0,0 +1,109 @@ + + + + nemo-compiler + org.apache.nemo + 0.1-SNAPSHOT + ../../pom.xml + + 4.0.0 + nemo-compiler-frontend-beam + Nemo Compiler Frontend: Beam + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.7.2 + provided + + + hadoop-yarn-common + org.apache.hadoop + + + slf4j-log4j12 + org.slf4j + + + hadoop-annotations + org.apache.hadoop + + + guice-servlet + com.google.inject.extensions + + + netty + io.netty + + + + + org.slf4j + slf4j-simple + 1.6.2 + test + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/compiler/optimizer/dependency-reduced-pom.xml b/compiler/optimizer/dependency-reduced-pom.xml new file mode 100644 index 0000000000..c21e9936ee --- /dev/null +++ b/compiler/optimizer/dependency-reduced-pom.xml @@ -0,0 +1,86 @@ + + + + nemo-compiler + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-compiler-optimizer + Nemo Compiler Optimizer + + + org.slf4j + slf4j-simple + 1.6.2 + test + + + slf4j-api + org.slf4j + + + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/conf/dependency-reduced-pom.xml b/conf/dependency-reduced-pom.xml new file mode 100644 index 0000000000..ad1ee39205 --- /dev/null +++ b/conf/dependency-reduced-pom.xml @@ -0,0 +1,74 @@ + + + + nemo-project + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-conf + Nemo Job Configuration + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/pom.xml b/pom.xml index b52f3c1529..426cff51bf 100644 --- a/pom.xml +++ b/pom.xml @@ -280,7 +280,7 @@ under the License. - + org.apache.rat apache-rat-plugin 0.12 diff --git a/runtime/common/dependency-reduced-pom.xml b/runtime/common/dependency-reduced-pom.xml new file mode 100644 index 0000000000..431759ad0c --- /dev/null +++ b/runtime/common/dependency-reduced-pom.xml @@ -0,0 +1,80 @@ + + + + nemo-runtime + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-runtime-common + Nemo Runtime Common + + + io.grpc + grpc-testing + 1.7.0 + test + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/runtime/driver/dependency-reduced-pom.xml b/runtime/driver/dependency-reduced-pom.xml new file mode 100644 index 0000000000..977d01a209 --- /dev/null +++ b/runtime/driver/dependency-reduced-pom.xml @@ -0,0 +1,74 @@ + + + + nemo-runtime + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-driver + Nemo Driver + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/runtime/executor/dependency-reduced-pom.xml b/runtime/executor/dependency-reduced-pom.xml new file mode 100644 index 0000000000..f89cff0717 --- /dev/null +++ b/runtime/executor/dependency-reduced-pom.xml @@ -0,0 +1,98 @@ + + + + nemo-runtime + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-runtime-executor + Nemo Runtime Executor + + + commons-io + commons-io + 2.5 + test + + + org.apache.nemo + nemo-runtime-master + 0.1-SNAPSHOT + test + + + org.slf4j + slf4j-simple + 1.6.2 + test + + + slf4j-api + org.slf4j + + + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/runtime/master/dependency-reduced-pom.xml b/runtime/master/dependency-reduced-pom.xml new file mode 100644 index 0000000000..325b801969 --- /dev/null +++ b/runtime/master/dependency-reduced-pom.xml @@ -0,0 +1,86 @@ + + + + nemo-runtime + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-runtime-master + Nemo Runtime Master + + + org.slf4j + slf4j-simple + 1.6.2 + test + + + slf4j-api + org.slf4j + + + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + diff --git a/runtime/test/dependency-reduced-pom.xml b/runtime/test/dependency-reduced-pom.xml new file mode 100644 index 0000000000..914674a039 --- /dev/null +++ b/runtime/test/dependency-reduced-pom.xml @@ -0,0 +1,74 @@ + + + + nemo-runtime + org.apache.nemo + 0.1-SNAPSHOT + + 4.0.0 + nemo-runtime-test + Nemo Runtime Test + + + junit + junit + 4.12 + test + + + hamcrest-core + org.hamcrest + + + + + org.mockito + mockito-core + 2.13.0 + test + + + byte-buddy + net.bytebuddy + + + byte-buddy-agent + net.bytebuddy + + + objenesis + org.objenesis + + + + + org.powermock + powermock-module-junit4 + 2.0.0-beta.5 + test + + + powermock-module-junit4-common + org.powermock + + + hamcrest-core + org.hamcrest + + + + + org.powermock + powermock-api-mockito2 + 2.0.0-beta.5 + test + + + powermock-api-support + org.powermock + + + + + + From 2bfcface71c38c8eb8e397dc99d70a1e83a938e4 Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Thu, 14 Feb 2019 07:52:06 +0000 Subject: [PATCH 026/235] Revert "dependency c" This reverts commit 5551cbf2df8c6c211238c907b18d3949ea7d4acc. --- client/dependency-reduced-pom.xml | 74 ------------ common/dependency-reduced-pom.xml | 102 ---------------- compiler/backend/dependency-reduced-pom.xml | 92 --------------- .../frontend/beam/dependency-reduced-pom.xml | 109 ------------------ compiler/optimizer/dependency-reduced-pom.xml | 86 -------------- conf/dependency-reduced-pom.xml | 74 ------------ pom.xml | 2 +- runtime/common/dependency-reduced-pom.xml | 80 ------------- runtime/driver/dependency-reduced-pom.xml | 74 ------------ runtime/executor/dependency-reduced-pom.xml | 98 ---------------- runtime/master/dependency-reduced-pom.xml | 86 -------------- runtime/test/dependency-reduced-pom.xml | 74 ------------ 12 files changed, 1 insertion(+), 950 deletions(-) delete mode 100644 client/dependency-reduced-pom.xml delete mode 100644 common/dependency-reduced-pom.xml delete mode 100644 compiler/backend/dependency-reduced-pom.xml delete mode 100644 compiler/frontend/beam/dependency-reduced-pom.xml delete mode 100644 compiler/optimizer/dependency-reduced-pom.xml delete mode 100644 conf/dependency-reduced-pom.xml delete mode 100644 runtime/common/dependency-reduced-pom.xml delete mode 100644 runtime/driver/dependency-reduced-pom.xml delete mode 100644 runtime/executor/dependency-reduced-pom.xml delete mode 100644 runtime/master/dependency-reduced-pom.xml delete mode 100644 runtime/test/dependency-reduced-pom.xml diff --git a/client/dependency-reduced-pom.xml b/client/dependency-reduced-pom.xml deleted file mode 100644 index ab50517631..0000000000 --- a/client/dependency-reduced-pom.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - nemo-project - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-client - Nemo Client - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/common/dependency-reduced-pom.xml b/common/dependency-reduced-pom.xml deleted file mode 100644 index cc1ca5e360..0000000000 --- a/common/dependency-reduced-pom.xml +++ /dev/null @@ -1,102 +0,0 @@ - - - - nemo-project - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-common - Nemo Common - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.7.2 - provided - - - hadoop-yarn-common - org.apache.hadoop - - - slf4j-log4j12 - org.slf4j - - - hadoop-annotations - org.apache.hadoop - - - guice-servlet - com.google.inject.extensions - - - netty - io.netty - - - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/compiler/backend/dependency-reduced-pom.xml b/compiler/backend/dependency-reduced-pom.xml deleted file mode 100644 index d2e7932adc..0000000000 --- a/compiler/backend/dependency-reduced-pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - nemo-compiler - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-compiler-backend - Nemo Compiler Backend - - - org.apache.nemo - nemo-compiler-optimizer - 0.1-SNAPSHOT - test - - - org.slf4j - slf4j-simple - 1.6.2 - test - - - slf4j-api - org.slf4j - - - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/compiler/frontend/beam/dependency-reduced-pom.xml b/compiler/frontend/beam/dependency-reduced-pom.xml deleted file mode 100644 index b1be2a8ac1..0000000000 --- a/compiler/frontend/beam/dependency-reduced-pom.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - nemo-compiler - org.apache.nemo - 0.1-SNAPSHOT - ../../pom.xml - - 4.0.0 - nemo-compiler-frontend-beam - Nemo Compiler Frontend: Beam - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.7.2 - provided - - - hadoop-yarn-common - org.apache.hadoop - - - slf4j-log4j12 - org.slf4j - - - hadoop-annotations - org.apache.hadoop - - - guice-servlet - com.google.inject.extensions - - - netty - io.netty - - - - - org.slf4j - slf4j-simple - 1.6.2 - test - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/compiler/optimizer/dependency-reduced-pom.xml b/compiler/optimizer/dependency-reduced-pom.xml deleted file mode 100644 index c21e9936ee..0000000000 --- a/compiler/optimizer/dependency-reduced-pom.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - nemo-compiler - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-compiler-optimizer - Nemo Compiler Optimizer - - - org.slf4j - slf4j-simple - 1.6.2 - test - - - slf4j-api - org.slf4j - - - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/conf/dependency-reduced-pom.xml b/conf/dependency-reduced-pom.xml deleted file mode 100644 index ad1ee39205..0000000000 --- a/conf/dependency-reduced-pom.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - nemo-project - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-conf - Nemo Job Configuration - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/pom.xml b/pom.xml index 426cff51bf..b52f3c1529 100644 --- a/pom.xml +++ b/pom.xml @@ -280,7 +280,7 @@ under the License. - + org.apache.rat apache-rat-plugin 0.12 diff --git a/runtime/common/dependency-reduced-pom.xml b/runtime/common/dependency-reduced-pom.xml deleted file mode 100644 index 431759ad0c..0000000000 --- a/runtime/common/dependency-reduced-pom.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - nemo-runtime - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-runtime-common - Nemo Runtime Common - - - io.grpc - grpc-testing - 1.7.0 - test - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/runtime/driver/dependency-reduced-pom.xml b/runtime/driver/dependency-reduced-pom.xml deleted file mode 100644 index 977d01a209..0000000000 --- a/runtime/driver/dependency-reduced-pom.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - nemo-runtime - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-driver - Nemo Driver - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/runtime/executor/dependency-reduced-pom.xml b/runtime/executor/dependency-reduced-pom.xml deleted file mode 100644 index f89cff0717..0000000000 --- a/runtime/executor/dependency-reduced-pom.xml +++ /dev/null @@ -1,98 +0,0 @@ - - - - nemo-runtime - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-runtime-executor - Nemo Runtime Executor - - - commons-io - commons-io - 2.5 - test - - - org.apache.nemo - nemo-runtime-master - 0.1-SNAPSHOT - test - - - org.slf4j - slf4j-simple - 1.6.2 - test - - - slf4j-api - org.slf4j - - - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/runtime/master/dependency-reduced-pom.xml b/runtime/master/dependency-reduced-pom.xml deleted file mode 100644 index 325b801969..0000000000 --- a/runtime/master/dependency-reduced-pom.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - nemo-runtime - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-runtime-master - Nemo Runtime Master - - - org.slf4j - slf4j-simple - 1.6.2 - test - - - slf4j-api - org.slf4j - - - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - diff --git a/runtime/test/dependency-reduced-pom.xml b/runtime/test/dependency-reduced-pom.xml deleted file mode 100644 index 914674a039..0000000000 --- a/runtime/test/dependency-reduced-pom.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - nemo-runtime - org.apache.nemo - 0.1-SNAPSHOT - - 4.0.0 - nemo-runtime-test - Nemo Runtime Test - - - junit - junit - 4.12 - test - - - hamcrest-core - org.hamcrest - - - - - org.mockito - mockito-core - 2.13.0 - test - - - byte-buddy - net.bytebuddy - - - byte-buddy-agent - net.bytebuddy - - - objenesis - org.objenesis - - - - - org.powermock - powermock-module-junit4 - 2.0.0-beta.5 - test - - - powermock-module-junit4-common - org.powermock - - - hamcrest-core - org.hamcrest - - - - - org.powermock - powermock-api-mockito2 - 2.0.0-beta.5 - test - - - powermock-api-support - org.powermock - - - - - - From e7aeb919476bef59391a75e393cbd59280e13010 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Feb 2019 19:28:04 +0900 Subject: [PATCH 027/235] revert --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 1 + .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 805585468b..c4267059bc 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -89,6 +89,7 @@ public FileBlock(final String blockId, this.serializer = serializer; this.filePath = filePath; this.metadata = metadata; + LOG.info("HY: block id : {}", blockId); if(filePath.contains("crail")) { //conf = new CrailConfiguration(); //fs = CrailStore.newInstance(conf); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 8bf80bf9cd..e9d9ac89fb 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -68,11 +68,12 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.fileDirectory =volumeDirectory + "/" + jobId; new File(fileDirectory).mkdirs(); + this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); //LOG.info("HY: CrailStore created. Not yet for the file directory itself"); //Parent Node (/tmp_crail/jobId/) creation needed - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); + //fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); } @Override From 1c4b1bb996716ede0c484ec33a51df94a7fc42c8 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 13:04:19 +0900 Subject: [PATCH 028/235] Nemo Driver for Crail --- .../java/org/apache/nemo/conf/JobConf.java | 2 +- .../org/apache/nemo/driver/NemoDriver.java | 34 +++++++++++++++++++ .../executor/data/stores/CrailFileStore.java | 1 - 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 1ff516a45e..69995a216b 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "./tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") public final class CrailVolumeDirectory implements Name { } diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index b8d5885866..d48b15612d 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -18,6 +18,8 @@ */ package org.apache.nemo.driver; +import org.apache.crail.*; +import org.apache.crail.conf.CrailConfiguration; import org.apache.nemo.common.ir.IdManager; import org.apache.nemo.compiler.optimizer.pass.compiletime.annotating.ResourceSitePass; import org.apache.nemo.conf.JobConf; @@ -54,6 +56,7 @@ import org.slf4j.LoggerFactory; import javax.inject.Inject; +import java.io.IOException; import java.io.Serializable; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -86,6 +89,10 @@ public final class NemoDriver { // Client for sending log messages private final RemoteClientMessageLoggingHandler handler; + //Crail + CrailConfiguration conf; + CrailStore fs; + @Inject private NemoDriver(final UserApplicationRunner userApplicationRunner, final RuntimeMaster runtimeMaster, @@ -147,6 +154,33 @@ public final class StartHandler implements EventHandler { @Override public void onNext(final StartTime startTime) { setUpLogger(); + boolean baseDirExists; + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + + LOG.info("creating main dir /tmp_crail"); + /* + try { + if(fs.lookup("/tmp_crail").get() != null){ + baseDirExists = true; + } + else{ + baseDirExists = false; + } + } catch (Exception e) { + LOG.info("fs.lookup failed"); + e.printStackTrace(); + } + */ + fs.delete("/tmp_crail", true).get().syncDir(); + fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + LOG.info("creating main dir done"); + } + catch(Exception e){ + LOG.info("HY: Error occurred during driver crail main dir setup"); + e.printStackTrace(); + } runtimeMaster.requestContainer(resourceSpecificationString); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index e9d9ac89fb..bab143be10 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -68,7 +68,6 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.fileDirectory =volumeDirectory + "/" + jobId; new File(fileDirectory).mkdirs(); - this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); //LOG.info("HY: CrailStore created. Not yet for the file directory itself"); From 54a6577af4c57df0354d7303dd5ce5eb6b755574 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 13:24:31 +0900 Subject: [PATCH 029/235] error handling --- .../src/main/java/org/apache/nemo/driver/NemoDriver.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index d48b15612d..33fd77334e 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -173,7 +173,12 @@ public void onNext(final StartTime startTime) { e.printStackTrace(); } */ + try{ fs.delete("/tmp_crail", true).get().syncDir(); + } + catch(Exception e){ + LOG.info("HY: failed to delete /tmp_crail"); + } fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); LOG.info("creating main dir done"); } From 427db919bacf73bab59c9a6ad4f0b499f7fa4d24 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 14:37:47 +0900 Subject: [PATCH 030/235] file creation --- .../apache/nemo/runtime/executor/data/block/FileBlock.java | 1 + .../nemo/runtime/executor/data/stores/CrailFileStore.java | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index c4267059bc..3df641cd0c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -97,6 +97,7 @@ public FileBlock(final String blockId, try { this.fs = fs; this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); LOG.info("HY: crail file block created"); } catch (Exception e) { LOG.info("HY: crail file block creation failed"); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index bab143be10..9d44ed0c56 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -66,13 +66,13 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - this.fileDirectory =volumeDirectory + "/" + jobId; - new File(fileDirectory).mkdirs(); + this.fileDirectory = volumeDirectory + "/" + jobId; + //new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); //LOG.info("HY: CrailStore created. Not yet for the file directory itself"); //Parent Node (/tmp_crail/jobId/) creation needed - //fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, false); + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); } @Override From 9fc8f8c724cef4f5f0bf255d829bd38d4e264635 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 14:49:32 +0900 Subject: [PATCH 031/235] file creation edit --- .../java/org/apache/nemo/driver/NemoDriver.java | 2 +- .../executor/data/stores/CrailFileStore.java | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 33fd77334e..2ad08bf0fb 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -177,7 +177,7 @@ public void onNext(final StartTime startTime) { fs.delete("/tmp_crail", true).get().syncDir(); } catch(Exception e){ - LOG.info("HY: failed to delete /tmp_crail"); + LOG.info("failed to delete /tmp_crail"); } fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); LOG.info("creating main dir done"); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 9d44ed0c56..e49a528710 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -70,9 +70,19 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri //new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - //LOG.info("HY: CrailStore created. Not yet for the file directory itself"); - //Parent Node (/tmp_crail/jobId/) creation needed - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + boolean baseDirExists=false; + try { + if(fs.lookup(fileDirectory).get() != null){ + baseDirExists = true; + } + } catch (Exception e) { + LOG.info("fs.lookup failed"); + e.printStackTrace(); + } + + if(!baseDirExists){ + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + } } @Override From 1e6e40ad3b4f463d71c3a94f0642591c0ca4faf7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 15:23:00 +0900 Subject: [PATCH 032/235] parent node created --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 3df641cd0c..e96cdde562 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -96,6 +96,7 @@ public FileBlock(final String blockId, //file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); try { this.fs = fs; + fs.create(filePath,CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); LOG.info("HY: crail file block created"); From 3a40a3ca3d1637c06d63623dc31deefb950672b0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 17:56:55 +0900 Subject: [PATCH 033/235] directory structure changed --- .../runtime/executor/data/block/FileBlock.java | 4 ---- .../executor/data/stores/CrailFileStore.java | 18 +++--------------- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index e96cdde562..41095355af 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -91,12 +91,8 @@ public FileBlock(final String blockId, this.metadata = metadata; LOG.info("HY: block id : {}", blockId); if(filePath.contains("crail")) { - //conf = new CrailConfiguration(); - //fs = CrailStore.newInstance(conf); - //file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); try { this.fs = fs; - fs.create(filePath,CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); LOG.info("HY: crail file block created"); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index e49a528710..9bc7f35cdc 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -66,23 +66,11 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - this.fileDirectory = volumeDirectory + "/" + jobId; - //new File(fileDirectory).mkdirs(); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - boolean baseDirExists=false; - try { - if(fs.lookup(fileDirectory).get() != null){ - baseDirExists = true; - } - } catch (Exception e) { - LOG.info("fs.lookup failed"); - e.printStackTrace(); - } - - if(!baseDirExists){ - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - } + int host = fs.getLocationClass().value(); + this.fileDirectory = volumeDirectory + "/" + host + "/files"; + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); } @Override From 16aa04e5af5c05413dc7e08b0312b2b9bca7339e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 18:08:13 +0900 Subject: [PATCH 034/235] error catch --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 9bc7f35cdc..578c574c52 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -70,7 +70,13 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri this.fs = CrailStore.newInstance(conf); int host = fs.getLocationClass().value(); this.fileDirectory = volumeDirectory + "/" + host + "/files"; - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + try { + fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + } + catch(Exception e){ + LOG.info("HY: files directory creation failed"); + e.printStackTrace(); + } } @Override From d32ae05cd52a2b0491c4d5b8e75b4b42892f6cd0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 18:15:13 +0900 Subject: [PATCH 035/235] parent node created --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 578c574c52..91e7d07c0f 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -50,6 +50,7 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { private static final Logger LOG = LoggerFactory.getLogger(CrailFileStore.class.getName()); private final String fileDirectory; + private final String hostDirectory; private CrailConfiguration conf = null; private CrailStore fs = null; //CrailFile file = null; @@ -69,6 +70,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); int host = fs.getLocationClass().value(); + this.hostDirectory = volumeDirectory + "/" + host; this.fileDirectory = volumeDirectory + "/" + host + "/files"; try { fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); From 0e5879347eebebb19a5bc07f34082057d188166b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 18:25:48 +0900 Subject: [PATCH 036/235] edit --- .../apache/nemo/runtime/executor/data/stores/CrailFileStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 91e7d07c0f..3f107764e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -73,6 +73,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri this.hostDirectory = volumeDirectory + "/" + host; this.fileDirectory = volumeDirectory + "/" + host + "/files"; try { + fs.create(hostDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); } catch(Exception e){ From bc7edfc9e0f04b52982b9742521beb32b6ddc9de Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 18:30:48 +0900 Subject: [PATCH 037/235] edit --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 41095355af..29e769f8d5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -93,7 +93,7 @@ public FileBlock(final String blockId, if(filePath.contains("crail")) { try { this.fs = fs; - this.file = fs.create(filePath+'/'+id, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); LOG.info("HY: crail file block created"); } catch (Exception e) { From 1127cf798b96027d2af8500f5590abfdeb1f9f44 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 21:19:39 +0900 Subject: [PATCH 038/235] metadata creation --- .../runtime/executor/data/stores/CrailFileStore.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 3f107764e5..63f8367dbf 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -87,8 +87,16 @@ public Block createBlock(final String blockId) { deleteBlock(blockId); final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); + final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); + CrailFile file = null; + try { + file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } catch (Exception e) { + e.printStackTrace(); + } final RemoteFileMetadata metadata = - RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 8c7b26a5bf4460ff649fbf850716848351f76757 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 21:26:06 +0900 Subject: [PATCH 039/235] edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 63f8367dbf..d37bbd8bf4 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -88,6 +88,7 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); + /* CrailFile file = null; try { file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); @@ -95,6 +96,7 @@ public Block createBlock(final String blockId) { } catch (Exception e) { e.printStackTrace(); } + */ final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); From bb88241a30a07d9320f9d102e155af7ad975b8b0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 21:34:33 +0900 Subject: [PATCH 040/235] Revert "edit" This reverts commit 8c7b26a5bf4460ff649fbf850716848351f76757. --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index d37bbd8bf4..63f8367dbf 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -88,7 +88,6 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - /* CrailFile file = null; try { file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); @@ -96,7 +95,6 @@ public Block createBlock(final String blockId) { } catch (Exception e) { e.printStackTrace(); } - */ final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); From a33c60d65ba950c1c1c3ceb16bf52b36fb491aa2 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 21:34:45 +0900 Subject: [PATCH 041/235] Revert "metadata creation" This reverts commit 1127cf798b96027d2af8500f5590abfdeb1f9f44. --- .../runtime/executor/data/stores/CrailFileStore.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 63f8367dbf..3f107764e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -87,16 +87,8 @@ public Block createBlock(final String blockId) { deleteBlock(blockId); final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); - final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - CrailFile file = null; - try { - file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - } catch (Exception e) { - e.printStackTrace(); - } final RemoteFileMetadata metadata = - RemoteFileMetadata.create(metaPath); + RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 5698effe5cea2e9956ce6c2dd35a3c7a85b36fef Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 22:16:37 +0900 Subject: [PATCH 042/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 29e769f8d5..7e961466b3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -89,9 +89,9 @@ public FileBlock(final String blockId, this.serializer = serializer; this.filePath = filePath; this.metadata = metadata; - LOG.info("HY: block id : {}", blockId); if(filePath.contains("crail")) { try { + LOG.info("HY: FileBlock entered"); this.fs = fs; this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); From 85f4d4968da1c840a835ef552b4c16a90034bc0d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 22:21:13 +0900 Subject: [PATCH 043/235] logging --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 3f107764e5..b9acfa059e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -84,11 +84,13 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Override public Block createBlock(final String blockId) { + LOG.info("HY: Create Block started"); deleteBlock(blockId); final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final RemoteFileMetadata metadata = RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + LOG.info("HY: Create Block ended"); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 6fe0b5f3bd96f19fe6be6b39aa103ac5201ae197 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 22:40:26 +0900 Subject: [PATCH 044/235] Revert "logging" This reverts commit 85f4d4968da1c840a835ef552b4c16a90034bc0d. --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index b9acfa059e..3f107764e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -84,13 +84,11 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Override public Block createBlock(final String blockId) { - LOG.info("HY: Create Block started"); deleteBlock(blockId); final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final RemoteFileMetadata metadata = RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); - LOG.info("HY: Create Block ended"); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 73572e91a5bc8d3060e286045987dc61515d6fb5 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 22:41:59 +0900 Subject: [PATCH 045/235] Revert "Revert "metadata creation"" This reverts commit a33c60d65ba950c1c1c3ceb16bf52b36fb491aa2. --- .../runtime/executor/data/stores/CrailFileStore.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 3f107764e5..63f8367dbf 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -87,8 +87,16 @@ public Block createBlock(final String blockId) { deleteBlock(blockId); final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); + final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); + CrailFile file = null; + try { + file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } catch (Exception e) { + e.printStackTrace(); + } final RemoteFileMetadata metadata = - RemoteFileMetadata.create(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 4612dcbf8bee9f602f494d4de41ae979dccf8582 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 22:53:45 +0900 Subject: [PATCH 046/235] logging --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 63f8367dbf..2da607542b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -88,13 +88,6 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - CrailFile file = null; - try { - file = fs.create(metaPath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - } catch (Exception e) { - e.printStackTrace(); - } final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); From a1d45bc4eef611ab00c0f67081451d7fdd659152 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 23:20:03 +0900 Subject: [PATCH 047/235] remoteMetaData -> LocalMetadata --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 2da607542b..8ad7a04289 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -25,6 +25,7 @@ import org.apache.nemo.common.exception.BlockWriteException; import org.apache.nemo.runtime.executor.data.*; import org.apache.nemo.runtime.executor.data.block.Block; +import org.apache.nemo.runtime.executor.data.metadata.LocalFileMetadata; import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.apache.nemo.runtime.executor.data.metadata.RemoteFileMetadata; import org.apache.nemo.runtime.executor.data.block.FileBlock; @@ -88,8 +89,7 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - final RemoteFileMetadata metadata = - RemoteFileMetadata.create(metaPath); + final LocalFileMetadata metadata = new LocalFileMetadata(); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 3628b4601116a65c03a33094a563f9ec5f346ac3 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 23:27:31 +0900 Subject: [PATCH 048/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 7e961466b3..b88b7543fd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -114,6 +114,7 @@ public FileBlock(final String blockId, private void writeToFile(final Iterable> serializedPartitions) throws Exception { if (filePath.contains("crail")) { + LOG.info("HY: FileBlock writeToFile started"); //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write final CrailOutputStream fileOutputStream = file.getDirectOutputStream(1024); CrailBuffer buffer = fs.allocateBuffer(); @@ -121,6 +122,7 @@ private void writeToFile(final Iterable> serializedPartit buffer.put(serializedPartition.getData()); } fileOutputStream.write(buffer); + LOG.info("HY: FileBlock writeToFile started"); } else { try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { From 813bc9b5c6277286c5b5cbd6d72ca4df585dd1c3 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 16 Feb 2019 23:33:31 +0900 Subject: [PATCH 049/235] revert --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 8ad7a04289..97868ccb79 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -89,7 +89,7 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - final LocalFileMetadata metadata = new LocalFileMetadata(); + final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 80896a35c0c928cf968ce5d6177ba37440e9f687 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 00:00:18 +0900 Subject: [PATCH 050/235] buffer usage edit --- .../nemo/runtime/executor/data/block/FileBlock.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index b88b7543fd..5cc6165945 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -37,6 +37,7 @@ import javax.annotation.concurrent.NotThreadSafe; import java.io.*; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; @@ -116,13 +117,14 @@ private void writeToFile(final Iterable> serializedPartit if (filePath.contains("crail")) { LOG.info("HY: FileBlock writeToFile started"); //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write - final CrailOutputStream fileOutputStream = file.getDirectOutputStream(1024); - CrailBuffer buffer = fs.allocateBuffer(); + final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); + for(final SerializedPartition serializedPartition : serializedPartitions){ - buffer.put(serializedPartition.getData()); + //buffer.put(serializedPartition.getData()); + fileOutputStream.write(serializedPartition.getData()); } - fileOutputStream.write(buffer); - LOG.info("HY: FileBlock writeToFile started"); + //fileOutputStream.write(buffer); + LOG.info("HY: FileBlock writeToFile ended"); } else { try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { From 3559d3e58f946ea7154f2bae9463325cf6b268a5 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 10:06:23 +0900 Subject: [PATCH 051/235] file structure edit --- .../java/org/apache/nemo/driver/NemoDriver.java | 1 + .../runtime/executor/data/block/FileBlock.java | 3 +-- .../executor/data/stores/CrailFileStore.java | 14 ++------------ 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 2ad08bf0fb..dba67c5f55 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -180,6 +180,7 @@ public void onNext(final StartTime startTime) { LOG.info("failed to delete /tmp_crail"); } fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + fs.create("/tmp_crail/files", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); LOG.info("creating main dir done"); } catch(Exception e){ diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 5cc6165945..f89c42f1e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -120,10 +120,9 @@ private void writeToFile(final Iterable> serializedPartit final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ - //buffer.put(serializedPartition.getData()); fileOutputStream.write(serializedPartition.getData()); } - //fileOutputStream.write(buffer); + fileOutputStream.close(); LOG.info("HY: FileBlock writeToFile ended"); } else { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 97868ccb79..3b9ec1d74c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -51,7 +51,6 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { private static final Logger LOG = LoggerFactory.getLogger(CrailFileStore.class.getName()); private final String fileDirectory; - private final String hostDirectory; private CrailConfiguration conf = null; private CrailStore fs = null; //CrailFile file = null; @@ -70,17 +69,8 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - int host = fs.getLocationClass().value(); - this.hostDirectory = volumeDirectory + "/" + host; - this.fileDirectory = volumeDirectory + "/" + host + "/files"; - try { - fs.create(hostDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - fs.create(fileDirectory, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - } - catch(Exception e){ - LOG.info("HY: files directory creation failed"); - e.printStackTrace(); - } + //int host = fs.getLocationClass().value(); + this.fileDirectory = volumeDirectory + "/files"; } @Override From 9d812e0580f0b858775ac72f22453675b0df8ace Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 10:26:50 +0900 Subject: [PATCH 052/235] use LocalFileMetaData for CrailFileStore --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 3b9ec1d74c..fedfcb980f 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -79,7 +79,8 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); + //final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); + final LocalFileMetadata metadata = new LocalFileMetadata(); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From f7be76d09f6906119e8463de65fc63df592e5cbc Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 11:05:30 +0900 Subject: [PATCH 053/235] CrailFileMetaData --- .../data/metadata/CrailFileMetadata.java | 172 ++++++++++++++++++ .../executor/data/stores/CrailFileStore.java | 4 +- 2 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java new file mode 100644 index 0000000000..faa75473bd --- /dev/null +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.nemo.runtime.executor.data.metadata; + +import org.apache.commons.lang3.SerializationUtils; +import org.apache.crail.*; +import org.apache.crail.conf.CrailConfiguration; +import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.ThreadSafe; +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +/** + * This class represents a metadata for a remote file block. + * Because the data is stored in a remote file and globally accessed by multiple nodes, + * each read, or deletion for a block needs one instance of this metadata. + * The metadata is store in and read from a file (after a remote file block is committed). + * @param the key type of its partitions. + */ +@ThreadSafe +public final class CrailFileMetadata extends FileMetadata { + private static final Logger LOG = LoggerFactory.getLogger(CrailFileMetadata.class.getName()); + private final String metaFilePath; + private static CrailConfiguration conf; + private static CrailStore fs; + private static CrailFile file; + /** + * Constructor for creating a non-committed new file metadata. + * + * @param metaFilePath the metadata file path. + */ + private CrailFileMetadata(final String metaFilePath) { + super(); + this.metaFilePath = metaFilePath; + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } + catch(Exception e){ + LOG.info("HY: CrailConfiguration failed"); + e.printStackTrace(); + } + } + + /** + * Constructor for opening a existing file metadata. + * + * @param metaFilePath the metadata file path. + * @param partitionMetadataList the partition metadata list. + */ + private CrailFileMetadata(final String metaFilePath, + final List> partitionMetadataList) { + super(partitionMetadataList); + this.metaFilePath = metaFilePath; + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } + catch(Exception e){ + LOG.info("HY: CrailConfiguration failed"); + e.printStackTrace(); + } + } + + /** + * @see FileMetadata#deleteMetadata() + */ + @Override + public void deleteMetadata() throws IOException { + try { + fs.delete(metaFilePath, true).get().syncDir(); + } catch (Exception e) { + LOG.info("HY: metadata deletion failed"); + e.printStackTrace(); + } + } + + /** + * Write the collected {@link PartitionMetadata}s to the metadata file. + * Notifies that all writes are finished for the block corresponding to this metadata. + */ + @Override + public synchronized void commitBlock() throws IOException { + final Iterable> partitionMetadataItr = getPartitionMetadataList(); + try{ + CrailBufferedOutputStream metaFileOutputstream =file.getBufferedOutputStream(0); + for (PartitionMetadata partitionMetadata : partitionMetadataItr) { + final byte[] key = SerializationUtils.serialize(partitionMetadata.getKey()); + metaFileOutputstream.writeInt(key.length); + metaFileOutputstream.write(key); + metaFileOutputstream.writeInt(partitionMetadata.getPartitionSize()); + metaFileOutputstream.writeLong(partitionMetadata.getOffset()); + } + metaFileOutputstream.close(); + } + catch(Exception e){ + LOG.info("HY: CrailBufferedOutputStream exception occurred"); + } + setCommitted(true); + } + + /** + * Creates a new block metadata. + * + * @param metaFilePath the path of the file to write metadata. + * @param the key type of the block's partitions. + * @return the created block metadata. + */ + public static CrailFileMetadata create(final String metaFilePath) { + return new CrailFileMetadata<>(metaFilePath); + } + + /** + * Opens a existing block metadata in file. + * + * @param metaFilePath the path of the file to write metadata. + * @param the key type of the block's partitions. + * @return the created block metadata. + * @throws IOException if fail to open. + */ + public static CrailFileMetadata open(final String metaFilePath) throws Exception{ + if (fs.lookup(metaFilePath)==null){ + throw new IOException("File " + metaFilePath + " does not exist!"); + } + final List> partitionMetadataList = new ArrayList<>(); + try ( + CrailBufferedInputStream metaFileInputstream =file.getBufferedInputStream(0); + ) { + while (metaFileInputstream.available() > 0) { + final int keyLength = metaFileInputstream.readInt(); + final byte[] desKey = new byte[keyLength]; + if (keyLength != metaFileInputstream.read(desKey)) { + throw new IOException("Invalid key length!"); + } + + final PartitionMetadata partitionMetadata = new PartitionMetadata<>( + SerializationUtils.deserialize(desKey), + metaFileInputstream.readInt(), + metaFileInputstream.readLong() + ); + partitionMetadataList.add(partitionMetadata); + } + } + return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); + } +} diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index fedfcb980f..b372f020d7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -25,6 +25,8 @@ import org.apache.nemo.common.exception.BlockWriteException; import org.apache.nemo.runtime.executor.data.*; import org.apache.nemo.runtime.executor.data.block.Block; +import org.apache.nemo.runtime.executor.data.metadata.CrailFileMetadata; +import org.apache.nemo.runtime.executor.data.metadata.FileMetadata; import org.apache.nemo.runtime.executor.data.metadata.LocalFileMetadata; import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.apache.nemo.runtime.executor.data.metadata.RemoteFileMetadata; @@ -80,7 +82,7 @@ public Block createBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); //final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); - final LocalFileMetadata metadata = new LocalFileMetadata(); + final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From c68cf249cce30401c34be5226fbbdee89e5a5ee6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 11:10:25 +0900 Subject: [PATCH 054/235] logging --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index b372f020d7..a0e24a7d44 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -81,8 +81,9 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - //final RemoteFileMetadata metadata = RemoteFileMetadata.create(metaPath); + LOG.info("HY: Before entering metadata creation"); final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath); + LOG.info("HY: Finished creating metadata"); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } From 0a23ccf57416463b14ccbd2d660501fcdbc49aa4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 14:04:48 +0900 Subject: [PATCH 055/235] metadata write added --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index f89c42f1e5..56431d7938 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -118,8 +118,8 @@ private void writeToFile(final Iterable> serializedPartit LOG.info("HY: FileBlock writeToFile started"); //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); - for(final SerializedPartition serializedPartition : serializedPartitions){ + metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData()); } fileOutputStream.close(); From 44371db09e291bebb960fbee8c01dd37be0f9fd1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 14:13:36 +0900 Subject: [PATCH 056/235] logging --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 56431d7938..2b3a40eab0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -119,7 +119,7 @@ private void writeToFile(final Iterable> serializedPartit //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ - metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); + //metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData()); } fileOutputStream.close(); From 36cb4536766d4b5f14c15c72022a01fa07f9dedb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 14:51:25 +0900 Subject: [PATCH 057/235] block fetch --- .../org/apache/nemo/driver/NemoDriver.java | 13 ------- .../executor/data/block/FileBlock.java | 2 +- .../executor/data/stores/CrailFileStore.java | 34 ++++++++++++------- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index dba67c5f55..0b9c40045e 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -160,19 +160,6 @@ public void onNext(final StartTime startTime) { fs = CrailStore.newInstance(conf); LOG.info("creating main dir /tmp_crail"); - /* - try { - if(fs.lookup("/tmp_crail").get() != null){ - baseDirExists = true; - } - else{ - baseDirExists = false; - } - } catch (Exception e) { - LOG.info("fs.lookup failed"); - e.printStackTrace(); - } - */ try{ fs.delete("/tmp_crail", true).get().syncDir(); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 2b3a40eab0..56431d7938 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -119,7 +119,7 @@ private void writeToFile(final Iterable> serializedPartit //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ - //metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); + metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData()); } fileOutputStream.close(); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index a0e24a7d44..098d46a13e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -117,15 +117,22 @@ public void writeBlock(final Block block) throws BlockWriteException { public Optional readBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); - if (!new File(filePath).isFile()) { - return Optional.empty(); - } else { - try { - final FileBlock block = getBlockFromFile(blockId); - return Optional.of(block); - } catch (final IOException e) { - throw new BlockFetchException(e); + try { + if(fs.lookup(filePath)==null){ + return Optional.empty(); + } + else { + try { + final FileBlock block = getBlockFromFile(blockId); + return Optional.of(block); + } catch (final IOException e) { + throw new BlockFetchException(e); + } } + } catch (Exception e) { + LOG.info("HY: lookup failed during readBlock"); + e.printStackTrace(); + return null; } } @@ -136,7 +143,7 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio * @return whether the block exists or not. */ @Override - public boolean deleteBlock(final String blockId) throws BlockFetchException { + public boolean deleteBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { @@ -147,7 +154,8 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { } else { return false; } - } catch (final IOException e) { + } catch (final Exception e) { + LOG.info("HY: getBlockFromFile might have failed"); throw new BlockFetchException(e); } } @@ -163,11 +171,11 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { * @return the {@link FileBlock} gotten. * @throws IOException if fail to get. */ - private FileBlock getBlockFromFile(final String blockId) throws IOException { + private FileBlock getBlockFromFile(final String blockId) throws Exception { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); - final RemoteFileMetadata metadata = - RemoteFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + final CrailFileMetadata metadata = + CrailFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } } From b8dbfc03605402e8dc71174a18c71a641f558a5d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 15:16:24 +0900 Subject: [PATCH 058/235] exception handling --- .../executor/data/stores/CrailFileStore.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 098d46a13e..f2d54a43f0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -143,21 +143,27 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio * @return whether the block exists or not. */ @Override - public boolean deleteBlock(final String blockId) { + public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); + FileBlock block=null; try { - if (new File(filePath).isFile()) { - final FileBlock block = getBlockFromFile(blockId); - block.deleteFile(); + if (fs.lookup(filePath)==null) { + try { + block = getBlockFromFile(blockId); + block.deleteFile(); + } + catch(Exception e) { + LOG.info("HY: getBlockFromFile failed"); + } return true; } else { return false; } - } catch (final Exception e) { - LOG.info("HY: getBlockFromFile might have failed"); - throw new BlockFetchException(e); + } catch (Exception e) { + e.printStackTrace(); } + return false; } /** From 70f53d2e2955cf986db9d3aaa29038c3bc8084f0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 15:56:06 +0900 Subject: [PATCH 059/235] exception handling --- .../nemo/runtime/executor/data/block/FileBlock.java | 9 +++++++-- .../runtime/executor/data/stores/CrailFileStore.java | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 56431d7938..cb17c710ae 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -344,8 +344,13 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { */ public void deleteFile() throws IOException { metadata.deleteMetadata(); - if (new File(filePath).exists()) { - Files.delete(Paths.get(filePath)); + try { + if (fs.lookup(filePath) != null) { + fs.delete(filePath, true); + } + }catch (Exception e){ + LOG.info("HY: deleteFile failed"); + e.printStackTrace(); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index f2d54a43f0..ed0a68ca6c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -146,9 +146,8 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); FileBlock block=null; - try { - if (fs.lookup(filePath)==null) { + if (fs.lookup(filePath)!=null) { try { block = getBlockFromFile(blockId); block.deleteFile(); From 420f8c273a65c9cae70901932f5a00aa38a7b313 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 16:01:52 +0900 Subject: [PATCH 060/235] exception handling --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index ed0a68ca6c..a66f9e1595 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -159,10 +159,9 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ } else { return false; } - } catch (Exception e) { - e.printStackTrace(); + } catch (final Exception e) { + throw new BlockFetchException(e); } - return false; } /** From e17c9ca5af924c92c992145966b18588891983bc Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 16:21:55 +0900 Subject: [PATCH 061/235] fs.lookup usage edit --- .../executor/data/block/FileBlock.java | 10 +++--- .../data/metadata/CrailFileMetadata.java | 34 +++++++++++-------- .../executor/data/stores/CrailFileStore.java | 31 ++++++----------- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index cb17c710ae..84b19f3326 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -345,10 +345,12 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { public void deleteFile() throws IOException { metadata.deleteMetadata(); try { - if (fs.lookup(filePath) != null) { - fs.delete(filePath, true); - } - }catch (Exception e){ + fs.lookup(filePath); + fs.delete(filePath, true); + }catch (IOException e){ + e.printStackTrace(); + } + catch (Exception e){ LOG.info("HY: deleteFile failed"); e.printStackTrace(); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index faa75473bd..f0df902365 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -145,28 +145,32 @@ public static CrailFileMetadata create(final String * @throws IOException if fail to open. */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ - if (fs.lookup(metaFilePath)==null){ + try { + fs.lookup(metaFilePath); + } + catch(Exception e){ throw new IOException("File " + metaFilePath + " does not exist!"); } - final List> partitionMetadataList = new ArrayList<>(); - try ( - CrailBufferedInputStream metaFileInputstream =file.getBufferedInputStream(0); - ) { - while (metaFileInputstream.available() > 0) { - final int keyLength = metaFileInputstream.readInt(); - final byte[] desKey = new byte[keyLength]; - if (keyLength != metaFileInputstream.read(desKey)) { - throw new IOException("Invalid key length!"); - } + final List> partitionMetadataList = new ArrayList<>(); + try ( + CrailBufferedInputStream metaFileInputstream = file.getBufferedInputStream(0); + ) { + while (metaFileInputstream.available() > 0) { + final int keyLength = metaFileInputstream.readInt(); + final byte[] desKey = new byte[keyLength]; + if (keyLength != metaFileInputstream.read(desKey)) { + throw new IOException("Invalid key length!"); + } - final PartitionMetadata partitionMetadata = new PartitionMetadata<>( + final PartitionMetadata partitionMetadata = new PartitionMetadata<>( SerializationUtils.deserialize(desKey), metaFileInputstream.readInt(), metaFileInputstream.readLong() - ); - partitionMetadataList.add(partitionMetadata); + ); + partitionMetadataList.add(partitionMetadata); + } } - } + return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index a66f9e1595..5f4228c736 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -118,21 +118,16 @@ public void writeBlock(final Block block) throws BlockWriteException { public Optional readBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - if(fs.lookup(filePath)==null){ - return Optional.empty(); - } - else { + fs.lookup(filePath); try { final FileBlock block = getBlockFromFile(blockId); return Optional.of(block); } catch (final IOException e) { throw new BlockFetchException(e); } - } } catch (Exception e) { - LOG.info("HY: lookup failed during readBlock"); e.printStackTrace(); - return null; + return Optional.empty(); } } @@ -145,22 +140,16 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio @Override public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); - FileBlock block=null; + try { - if (fs.lookup(filePath)!=null) { - try { - block = getBlockFromFile(blockId); - block.deleteFile(); - } - catch(Exception e) { - LOG.info("HY: getBlockFromFile failed"); - } - return true; - } else { - return false; - } - } catch (final Exception e) { + fs.lookup(filePath); + final FileBlock block = getBlockFromFile(blockId); + block.deleteFile(); + return false; + } catch (final IOException e) { throw new BlockFetchException(e); + } catch(Exception e){ + return false; } } From 4bba4663c390746b1add1f6b499402371f6b4cb6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 16:41:20 +0900 Subject: [PATCH 062/235] fs.lookup usage edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 5f4228c736..60dde0bdb5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -142,10 +142,10 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - fs.lookup(filePath); + fs.lookup(filePath).get().asFile(); final FileBlock block = getBlockFromFile(blockId); block.deleteFile(); - return false; + return true; } catch (final IOException e) { throw new BlockFetchException(e); } catch(Exception e){ From 78a8caeb95ded2181c5171cfa8441709641fa1ec Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 18:45:44 +0900 Subject: [PATCH 063/235] fs.lookup usage edit --- .../executor/data/block/FileBlock.java | 4 +- .../data/metadata/CrailFileMetadata.java | 40 +++++++++---------- .../executor/data/stores/CrailFileStore.java | 26 ++++++++---- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 84b19f3326..ea1f7fd3e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -345,8 +345,8 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { public void deleteFile() throws IOException { metadata.deleteMetadata(); try { - fs.lookup(filePath); - fs.delete(filePath, true); + if(fs.lookup(filePath).get()!=null) + fs.delete(filePath, true); }catch (IOException e){ e.printStackTrace(); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index f0df902365..0b618684b5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -145,32 +145,28 @@ public static CrailFileMetadata create(final String * @throws IOException if fail to open. */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ - try { - fs.lookup(metaFilePath); - } - catch(Exception e){ + if (fs.lookup(metaFilePath).get()==null) { throw new IOException("File " + metaFilePath + " does not exist!"); } - final List> partitionMetadataList = new ArrayList<>(); - try ( - CrailBufferedInputStream metaFileInputstream = file.getBufferedInputStream(0); - ) { - while (metaFileInputstream.available() > 0) { - final int keyLength = metaFileInputstream.readInt(); - final byte[] desKey = new byte[keyLength]; - if (keyLength != metaFileInputstream.read(desKey)) { - throw new IOException("Invalid key length!"); - } - - final PartitionMetadata partitionMetadata = new PartitionMetadata<>( - SerializationUtils.deserialize(desKey), - metaFileInputstream.readInt(), - metaFileInputstream.readLong() - ); - partitionMetadataList.add(partitionMetadata); + final List> partitionMetadataList = new ArrayList<>(); + try ( + final CrailBufferedInputStream dataInputStream = file.getBufferedInputStream(0); + ) { + while (dataInputStream.available() > 0) { + final int keyLength = dataInputStream.readInt(); + final byte[] desKey = new byte[keyLength]; + if (keyLength != dataInputStream.read(desKey)) { + throw new IOException("Invalid key length!"); } - } + final PartitionMetadata partitionMetadata = new PartitionMetadata<>( + SerializationUtils.deserialize(desKey), + dataInputStream.readInt(), + dataInputStream.readLong() + ); + partitionMetadataList.add(partitionMetadata); + } + } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 60dde0bdb5..875d4c3588 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -118,16 +118,22 @@ public void writeBlock(final Block block) throws BlockWriteException { public Optional readBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - fs.lookup(filePath); + if (fs.lookup(filePath).get()==null) { + return Optional.empty(); + } else { try { final FileBlock block = getBlockFromFile(blockId); return Optional.of(block); } catch (final IOException e) { throw new BlockFetchException(e); + } catch (Exception e){ + e.printStackTrace(); + throw new BlockFetchException(e); } + } } catch (Exception e) { e.printStackTrace(); - return Optional.empty(); + throw new BlockFetchException(e); } } @@ -142,14 +148,18 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - fs.lookup(filePath).get().asFile(); - final FileBlock block = getBlockFromFile(blockId); - block.deleteFile(); - return true; + if (fs.lookup(filePath).get()!=null) { + final FileBlock block = getBlockFromFile(blockId); + block.deleteFile(); + return true; + } else { + return false; + } } catch (final IOException e) { throw new BlockFetchException(e); - } catch(Exception e){ - return false; + } catch (Exception e){ + e.printStackTrace(); + throw new BlockFetchException(e); } } From 0b7e11ddd49a617a00afc68bc994f3b36cf788c9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 18:56:06 +0900 Subject: [PATCH 064/235] fs.lookup usage edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 875d4c3588..30ceddabcb 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -148,7 +148,8 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - if (fs.lookup(filePath).get()!=null) { + CrailFile file = fs.lookup(filePath).get().asFile(); file.syncDir(); + if (file!=null) { final FileBlock block = getBlockFromFile(blockId); block.deleteFile(); return true; From 98768268a0fe00de94dc0baee8e281581c7caed2 Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Sun, 17 Feb 2019 10:50:07 +0000 Subject: [PATCH 065/235] logging revert --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 30ceddabcb..1a8a13a779 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -146,8 +146,8 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio @Override public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); - - try { + + try { CrailFile file = fs.lookup(filePath).get().asFile(); file.syncDir(); if (file!=null) { final FileBlock block = getBlockFromFile(blockId); From fb7ae5cd5382422bbd0b6443d2682ca08dbca65f Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Feb 2019 20:22:50 +0900 Subject: [PATCH 066/235] fs.lookup usage edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 30ceddabcb..875d4c3588 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -148,8 +148,7 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - CrailFile file = fs.lookup(filePath).get().asFile(); file.syncDir(); - if (file!=null) { + if (fs.lookup(filePath).get()!=null) { final FileBlock block = getBlockFromFile(blockId); block.deleteFile(); return true; From 1dcde816b97d1b07959256cb18591873d6b64e4d Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Sun, 17 Feb 2019 11:24:00 +0000 Subject: [PATCH 067/235] edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 1a8a13a779..23c1e0a16a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -159,8 +159,7 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ } catch (final IOException e) { throw new BlockFetchException(e); } catch (Exception e){ - e.printStackTrace(); - throw new BlockFetchException(e); + return false; } } From e021878ee721ff7794c92537553f655e4c5924ae Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 13:18:21 +0900 Subject: [PATCH 068/235] logging --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 0b618684b5..057ec4a6d9 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -145,6 +145,7 @@ public static CrailFileMetadata create(final String * @throws IOException if fail to open. */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ + LOG.info("HY: metafilePath {}", metaFilePath); if (fs.lookup(metaFilePath).get()==null) { throw new IOException("File " + metaFilePath + " does not exist!"); } From c22fd4a4765f39d58ebe85db4ebb5b16192b6b45 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 13:55:30 +0900 Subject: [PATCH 069/235] metadata fetch --- .../executor/data/metadata/CrailFileMetadata.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 057ec4a6d9..3cb74afa70 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -57,8 +57,14 @@ private CrailFileMetadata(final String metaFilePath) { try { conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); - this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); + try { + this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + }catch (Exception e){ + //when it already exists + this.file = fs.lookup(metaFilePath).get().asFile(); + file.syncDir(); + } } catch(Exception e){ LOG.info("HY: CrailConfiguration failed"); From e4d5eb839cf4f7d643a76bf19f0c6e7efd2cf2a9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 14:03:34 +0900 Subject: [PATCH 070/235] metadata fetch --- .../runtime/executor/data/block/FileBlock.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index ea1f7fd3e5..0f16e5db71 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -97,9 +97,18 @@ public FileBlock(final String blockId, this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); LOG.info("HY: crail file block created"); - } catch (Exception e) { - LOG.info("HY: crail file block creation failed"); - e.printStackTrace(); + } catch (Exception e1) { + LOG.info("HY: crail file block creation might have failed"); + e1.printStackTrace(); + try{ + this.file = fs.lookup(filePath).get().asFile(); + file.syncDir(); + LOG.info("HY: {} fetched", blockId); + } + catch(Exception e2){ + LOG.info("HY: {} fetch failed"); + e2.printStackTrace(); + } } } } From 50a99ca0db98256c5b3010a53c5c41d7f5d17b14 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 16:45:10 +0900 Subject: [PATCH 071/235] exception handling --- .../nemo/runtime/executor/data/block/FileBlock.java | 4 ++-- .../runtime/executor/data/metadata/CrailFileMetadata.java | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 0f16e5db71..ced77813bd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -99,7 +99,7 @@ public FileBlock(final String blockId, LOG.info("HY: crail file block created"); } catch (Exception e1) { LOG.info("HY: crail file block creation might have failed"); - e1.printStackTrace(); + //e1.printStackTrace(); try{ this.file = fs.lookup(filePath).get().asFile(); file.syncDir(); @@ -107,7 +107,7 @@ public FileBlock(final String blockId, } catch(Exception e2){ LOG.info("HY: {} fetch failed"); - e2.printStackTrace(); + // e2.printStackTrace(); } } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 3cb74afa70..05ec127c88 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -152,8 +152,12 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); - if (fs.lookup(metaFilePath).get()==null) { - throw new IOException("File " + metaFilePath + " does not exist!"); + try { + if (fs.lookup(metaFilePath).get() == null) { + throw new IOException("File " + metaFilePath + " does not exist!"); + } + }catch(Exception e){ + throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } final List> partitionMetadataList = new ArrayList<>(); try ( From e698e98990c132342970a950c62c89252d93d842 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 17:10:53 +0900 Subject: [PATCH 072/235] readPartitions in Crail --- .../executor/data/block/FileBlock.java | 42 ++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index ced77813bd..78aa163cd6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -234,17 +234,37 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final FileInputStream fileStream = new FileInputStream(filePath)) { - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { - final K key = partitionMetadata.getKey(); - if (keyRange.includes(key)) { - // The key value of this partition is in the range. - final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - } else { - // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); + if (filePath.contains("crail")) { + try{ + final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0); + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + final K key = partitionMetadata.getKey(); + if (keyRange.includes(key)) { + // The key value of this partition is in the range. + final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + } else { + // Have to skip this partition. + skipBytes(fileStream, partitionMetadata.getPartitionSize()); + } + } + }catch(Exception e){ + e.printStackTrace(); + } + } else { + try (final FileInputStream fileStream = new FileInputStream(filePath)) { + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + final K key = partitionMetadata.getKey(); + if (keyRange.includes(key)) { + // The key value of this partition is in the range. + final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + } else { + // Have to skip this partition. + skipBytes(fileStream, partitionMetadata.getPartitionSize()); + } } } } From a10bf8ab9ab4726dc773b9634d7d97cacf2617fe Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 17:17:39 +0900 Subject: [PATCH 073/235] edit --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 78aa163cd6..df5621ed9f 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -101,6 +101,7 @@ public FileBlock(final String blockId, LOG.info("HY: crail file block creation might have failed"); //e1.printStackTrace(); try{ + this.fs = fs; this.file = fs.lookup(filePath).get().asFile(); file.syncDir(); LOG.info("HY: {} fetched", blockId); From 5b9f74abdbe63dfb7448318e69b152f12c6e7aa7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 19:23:25 +0900 Subject: [PATCH 074/235] edit --- .../executor/data/metadata/CrailFileMetadata.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 05ec127c88..726e1a42bc 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -152,12 +152,12 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); + CrailFile file=null; try { - if (fs.lookup(metaFilePath).get() == null) { - throw new IOException("File " + metaFilePath + " does not exist!"); - } - }catch(Exception e){ - throw new IOException("HY: File "+metaFilePath+ " does not exist!"); + file = fs.lookup(metaFilePath).get().asFile(); + file.syncDir(); + }catch (Exception e){ + LOG.info("HY: File "+metaFilePath+" not found!"); } final List> partitionMetadataList = new ArrayList<>(); try ( From 911ccf30183cf584b778daba5671e027a0718ff6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Feb 2019 19:37:00 +0900 Subject: [PATCH 075/235] edit --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 726e1a42bc..508b109e49 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.SerializationUtils; import org.apache.crail.*; import org.apache.crail.conf.CrailConfiguration; +import org.apache.nemo.common.exception.BlockFetchException; import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -157,7 +158,7 @@ public static CrailFileMetadata open(final String me file = fs.lookup(metaFilePath).get().asFile(); file.syncDir(); }catch (Exception e){ - LOG.info("HY: File "+metaFilePath+" not found!"); + throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } final List> partitionMetadataList = new ArrayList<>(); try ( From bbc31d67d4f4cfdb71c021ec1e19b854c479b777 Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Mon, 18 Feb 2019 20:11:08 +0900 Subject: [PATCH 076/235] handle already existing metadata + added tpch --- .../nemo/examples/beam/tpch/Schemas.java | 363 ++++++++++++++++++ .../apache/nemo/examples/beam/tpch/Tpch.java | 196 ++++++++++ .../data/metadata/CrailFileMetadata.java | 19 +- 3 files changed, 571 insertions(+), 7 deletions(-) create mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java create mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java new file mode 100644 index 0000000000..f9dbd285f6 --- /dev/null +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java @@ -0,0 +1,363 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nemo.examples.beam.tpch; + +import com.google.common.collect.ImmutableMap; +import org.apache.beam.sdk.schemas.Schema; + +/** + * A simple SQL application. + * (Copied and adapted from https://github.com/apache/beam/pull/6240) + */ +public final class Schemas { + /** + * Private. + */ + private Schemas() { + } + + public static final ImmutableMap COLUMN_PREFIX = ImmutableMap.builder() + .put("lineitem", "l_") + .put("customer", "c_") + .put("supplier", "s_") + .put("partsupp", "ps_") + .put("part", "p_") + .put("orders", "o_") + .put("nation", "n_") + .put("region", "r_") + .build(); + + public static final Schema STORE_SALES_SCHEMA = + Schema.builder() + .addNullableField("ss_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("ss_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("ss_item_sk", Schema.FieldType.INT64) + .addNullableField("ss_customer_sk", Schema.FieldType.STRING) + .addNullableField("ss_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ss_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ss_addr_sk", Schema.FieldType.INT64) + .addNullableField("ss_store_sk", Schema.FieldType.INT64) + .addNullableField("ss_promo_sk", Schema.FieldType.INT64) + .addNullableField("ss_ticket_number", Schema.FieldType.INT64) + .addNullableField("ss_quantity", Schema.FieldType.INT64) + .addNullableField("ss_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ss_list_price", Schema.FieldType.FLOAT) + .addNullableField("ss_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("ss_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("ss_net_paid", Schema.FieldType.FLOAT) + .addNullableField("ss_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("ss_net_profit", Schema.FieldType.FLOAT) + .build(); + + public static final Schema DATE_DIM_SCHEMA = + Schema.builder() + .addNullableField("d_date_sk", Schema.FieldType.INT64) + .addNullableField("d_date_id", Schema.FieldType.STRING) + .addNullableField("d_date", Schema.FieldType.STRING) + .addNullableField("d_month_seq", Schema.FieldType.INT64) + .addNullableField("d_week_seq", Schema.FieldType.INT64) + .addNullableField("d_quarter_seq", Schema.FieldType.INT64) + .addNullableField("d_year", Schema.FieldType.INT64) + .addNullableField("d_dow", Schema.FieldType.INT64) + .addNullableField("d_moy", Schema.FieldType.INT64) + .addNullableField("d_dom", Schema.FieldType.INT64) + .addNullableField("d_qoy", Schema.FieldType.INT64) + .addNullableField("d_fy_year", Schema.FieldType.INT64) + .addNullableField("d_fy_quarter_seq", Schema.FieldType.INT64) + .addNullableField("d_fy_week_seq", Schema.FieldType.INT64) + .addNullableField("d_day_name", Schema.FieldType.STRING) + .addNullableField("d_quarter_name", Schema.FieldType.STRING) + .addNullableField("d_holiday", Schema.FieldType.STRING) + .addNullableField("d_weekend", Schema.FieldType.STRING) + .addNullableField("d_following_holiday", Schema.FieldType.STRING) + .addNullableField("d_first_dom", Schema.FieldType.INT64) + .addNullableField("d_last_dom", Schema.FieldType.INT64) + .addNullableField("d_same_day_ly", Schema.FieldType.INT64) + .addNullableField("d_same_day_lq", Schema.FieldType.INT64) + .addNullableField("d_current_day", Schema.FieldType.STRING) + .addNullableField("d_current_week", Schema.FieldType.STRING) + .addNullableField("d_current_month", Schema.FieldType.STRING) + .addNullableField("d_current_quarter", Schema.FieldType.STRING) + .addNullableField("d_current_year", Schema.FieldType.STRING) + .build(); + + public static final Schema ITEM_SCHEMA = + Schema.builder() + .addNullableField("i_item_sk", Schema.FieldType.INT64) + .addNullableField("i_item_id", Schema.FieldType.STRING) + .addNullableField("i_rec_start_date", Schema.FieldType.DATETIME) + .addNullableField("i_rec_end_date", Schema.FieldType.DATETIME) + .addNullableField("i_item_desc", Schema.FieldType.STRING) + .addNullableField("i_current_price", Schema.FieldType.FLOAT) + .addNullableField("i_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("i_brand_id", Schema.FieldType.INT64) + .addNullableField("i_brand", Schema.FieldType.STRING) + .addNullableField("i_class_id", Schema.FieldType.INT64) + .addNullableField("i_class", Schema.FieldType.STRING) + .addNullableField("i_category_id", Schema.FieldType.INT64) + .addNullableField("i_category", Schema.FieldType.STRING) + .addNullableField("i_manufact_id", Schema.FieldType.INT64) + .addNullableField("i_manufact", Schema.FieldType.STRING) + .addNullableField("i_size", Schema.FieldType.STRING) + .addNullableField("i_formulation", Schema.FieldType.STRING) + .addNullableField("i_color", Schema.FieldType.STRING) + .addNullableField("i_units", Schema.FieldType.STRING) + .addNullableField("i_container", Schema.FieldType.STRING) + .addNullableField("i_manager_id", Schema.FieldType.INT64) + .addNullableField("i_product_name", Schema.FieldType.STRING) + .build(); + + public static final Schema INVENTORY_SCHEMA = + Schema.builder() + .addNullableField("inv_date_sk", Schema.FieldType.INT64) + .addNullableField("inv_item_sk", Schema.FieldType.INT64) + .addNullableField("inv_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("inv_quantity_on_hand", Schema.FieldType.INT64) + .build(); + + public static final Schema CATALOG_SALES_SCHEMA = + Schema.builder() + .addNullableField("cs_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("cs_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_date_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_customer_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_addr_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_customer_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_addr_sk", Schema.FieldType.INT64) + .addNullableField("cs_call_center_sk", Schema.FieldType.INT64) + .addNullableField("cs_catalog_page_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_mode_sk", Schema.FieldType.INT64) + .addNullableField("cs_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("cs_item_sk", Schema.FieldType.INT64) + .addNullableField("cs_promo_sk", Schema.FieldType.INT64) + .addNullableField("cs_order_number", Schema.FieldType.INT64) + .addNullableField("cs_quantity", Schema.FieldType.INT64) + .addNullableField("cs_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_list_price", Schema.FieldType.FLOAT) + .addNullableField("cs_sales_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_ship_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_ship", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_net_profit", Schema.FieldType.FLOAT) + .build(); + + public static final Schema ORDER_SCHEMA = + Schema.builder() + .addInt64Field("o_orderkey") + .addInt64Field("o_custkey") + .addStringField("o_orderstatus") + .addFloatField("o_totalprice") + .addStringField("o_orderdate") + .addStringField("o_orderpriority") + .addStringField("o_clerk") + .addInt64Field("o_shippriority") + .addStringField("o_comment") + .build(); + + public static final Schema CUSTOMER_SCHEMA = + Schema.builder() + .addInt64Field("c_custkey") + .addStringField("c_name") + .addStringField("c_address") + .addInt64Field("c_nationkey") + .addStringField("c_phone") + .addFloatField("c_acctbal") + .addStringField("c_mktsegment") + .addStringField("c_comment") + .build(); + + public static final Schema CUSTOMER_DS_SCHEMA = + Schema.builder() + .addNullableField("c_customer_sk", Schema.FieldType.INT64) + .addNullableField("c_customer_id", Schema.FieldType.STRING) + .addNullableField("c_current_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("c_current_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("c_current_addr_sk", Schema.FieldType.INT64) + .addNullableField("c_first_shipto_date_sk", Schema.FieldType.INT64) + .addNullableField("c_first_sales_date_sk", Schema.FieldType.INT64) + .addNullableField("c_salutation", Schema.FieldType.STRING) + .addNullableField("c_first_name", Schema.FieldType.STRING) + .addNullableField("c_last_name", Schema.FieldType.STRING) + .addNullableField("c_preferred_cust_flag", Schema.FieldType.STRING) + .addNullableField("c_birth_day", Schema.FieldType.INT64) + .addNullableField("c_birth_month", Schema.FieldType.INT64) + .addNullableField("c_birth_year", Schema.FieldType.INT64) + .addNullableField("c_birth_country", Schema.FieldType.STRING) + .addNullableField("c_login", Schema.FieldType.STRING) + .addNullableField("c_email_address", Schema.FieldType.STRING) + .addNullableField("c_last_review_date", Schema.FieldType.STRING) + .build(); + + public static final Schema LINEITEM_SCHEMA = + Schema.builder() + .addInt64Field("l_orderkey") + .addInt64Field("l_partkey") + .addInt64Field("l_suppkey") + .addInt64Field("l_linenumber") + .addFloatField("l_quantity") + .addFloatField("l_extendedprice") + .addFloatField("l_discount") + .addFloatField("l_tax") + .addStringField("l_returnflag") + .addStringField("l_linestatus") + .addStringField("l_shipdate") + .addStringField("l_commitdate") + .addStringField("l_receiptdate") + .addStringField("l_shipinstruct") + .addStringField("l_shipmode") + .addStringField("l_comment") + .build(); + + public static final Schema PARTSUPP_SCHEMA = + Schema.builder() + .addInt64Field("ps_partkey") + .addInt64Field("ps_suppkey") + .addInt64Field("ps_availqty") + .addFloatField("ps_supplycost") + .addStringField("ps_comment") + .build(); + + public static final Schema REGION_SCHEMA = + Schema.builder() + .addInt64Field("r_regionkey") + .addStringField("r_name") + .addStringField("r_comment") + .build(); + + public static final Schema SUPPLIER_SCHEMA = + Schema.builder() + .addInt64Field("s_suppkey") + .addStringField("s_name") + .addStringField("s_address") + .addInt64Field("s_nationkey") + .addStringField("s_phone") + .addFloatField("s_acctbal") + .addStringField("s_comment") + .build(); + + public static final Schema PART_SCHEMA = + Schema.builder() + .addInt64Field("p_partkey") + .addStringField("p_name") + .addStringField("p_mfgr") + .addStringField("p_brand") + .addStringField("p_type") + .addInt64Field("p_size") + .addStringField("p_container") + .addFloatField("p_retailprice") + .addStringField("p_comment") + .build(); + + public static final Schema NATION_SCHEMA = + Schema.builder() + .addInt64Field("n_nationkey") + .addStringField("n_name") + .addInt64Field("n_regionkey") + .addStringField("n_comment") + .build(); + + public static final Schema PROMOTION_SCHEMA = + Schema.builder() + .addNullableField("p_promo_sk", Schema.FieldType.INT64) + .addNullableField("p_promo_id", Schema.FieldType.STRING) + .addNullableField("p_start_date_sk", Schema.FieldType.INT64) + .addNullableField("p_end_date_sk", Schema.FieldType.INT64) + .addNullableField("p_item_sk", Schema.FieldType.INT64) + .addNullableField("p_cost", Schema.FieldType.FLOAT) + .addNullableField("p_response_target", Schema.FieldType.INT64) + .addNullableField("p_promo_name", Schema.FieldType.STRING) + .addNullableField("p_channel_dmail", Schema.FieldType.STRING) + .addNullableField("p_channel_email", Schema.FieldType.STRING) + .addNullableField("p_channel_catalog", Schema.FieldType.STRING) + .addNullableField("p_channel_tv", Schema.FieldType.STRING) + .addNullableField("p_channel_radio", Schema.FieldType.STRING) + .addNullableField("p_channel_press", Schema.FieldType.STRING) + .addNullableField("p_channel_event", Schema.FieldType.STRING) + .addNullableField("p_channel_demo", Schema.FieldType.STRING) + .addNullableField("p_channel_details", Schema.FieldType.STRING) + .addNullableField("p_purpose", Schema.FieldType.STRING) + .addNullableField("p_discount_active", Schema.FieldType.STRING) + .build(); + + public static final Schema CUSTOMER_DEMOGRAPHIC_SCHEMA = + Schema.builder() + .addNullableField("cd_demo_sk", Schema.FieldType.INT64) + .addNullableField("cd_gender", Schema.FieldType.STRING) + .addNullableField("cd_marital_status", Schema.FieldType.STRING) + .addNullableField("cd_education_status", Schema.FieldType.STRING) + .addNullableField("cd_purchase_estimate", Schema.FieldType.INT64) + .addNullableField("cd_credit_rating", Schema.FieldType.STRING) + .addNullableField("cd_dep_count", Schema.FieldType.INT64) + .addNullableField("cd_dep_employed_count", Schema.FieldType.INT64) + .addNullableField("cd_dep_college_count", Schema.FieldType.INT64) + .build(); + + public static final Schema WEB_SALES_SCHEMA = + Schema.builder() + .addNullableField("ws_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("ws_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_date_sk", Schema.FieldType.INT64) + .addNullableField("ws_item_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_customer_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_addr_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_customer_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_addr_sk", Schema.FieldType.INT64) + .addNullableField("ws_web_page_sk", Schema.FieldType.INT64) + .addNullableField("ws_web_site_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_mode_sk", Schema.FieldType.INT64) + .addNullableField("ws_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("ws_promo_sk", Schema.FieldType.INT64) + .addNullableField("ws_order_number", Schema.FieldType.INT64) + .addNullableField("ws_quantity", Schema.FieldType.INT64) + .addNullableField("ws_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_list_price", Schema.FieldType.FLOAT) + .addNullableField("ws_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_ship_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_ship", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_net_profit", Schema.FieldType.FLOAT) + .build(); +} diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java new file mode 100644 index 0000000000..6134df334e --- /dev/null +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nemo.examples.beam.tpch; + +import com.google.common.collect.ImmutableMap; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.extensions.sql.SqlTransform; +import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.values.*; +import org.apache.commons.csv.CSVFormat; +import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; +import org.apache.nemo.compiler.frontend.beam.NemoRunner; +import org.apache.nemo.examples.beam.GenericSourceSink; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Stream; + +import static org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.beamRow2CsvLine; + +/** + * A simple SQL application. + * (Copied and adapted from https://github.com/apache/beam/pull/6240) + */ +public final class Tpch { + private static final Logger LOG = LoggerFactory.getLogger(Tpch.class.getName()); + + /** + * Private Constructor. + */ + private Tpch() { + } + + /** + * Row csv formats. + */ + static class RowToCsv extends PTransform, PCollection> implements Serializable { + + private final CSVFormat csvFormat; + + RowToCsv(final CSVFormat csvFormat) { + this.csvFormat = csvFormat; + } + + public CSVFormat getCsvFormat() { + return csvFormat; + } + + @Override + public PCollection expand(final PCollection input) { + return input.apply( + "rowToCsv", + MapElements.into(TypeDescriptors.strings()).via(row -> beamRow2CsvLine(row, csvFormat))); + } + } + + private static PCollectionTuple getHTables(final Pipeline pipeline, + final CSVFormat csvFormat, + final String inputDirectory, + final String query) { + final ImmutableMap hSchemas = ImmutableMap.builder() + .put("lineitem", Schemas.LINEITEM_SCHEMA) + .put("customer", Schemas.CUSTOMER_SCHEMA) + .put("orders", Schemas.ORDER_SCHEMA) + + .put("supplier", Schemas.SUPPLIER_SCHEMA) + .put("nation", Schemas.NATION_SCHEMA) + .put("region", Schemas.REGION_SCHEMA) + + .put("part", Schemas.PART_SCHEMA) + .put("partsupp", Schemas.PARTSUPP_SCHEMA) + /* + .put("store_sales", Schemas.STORE_SALES_SCHEMA) + .put("catalog_sales", Schemas.CATALOG_SALES_SCHEMA) + .put("item", Schemas.ITEM_SCHEMA) + .put("date_dim", Schemas.DATE_DIM_SCHEMA) + .put("promotion", Schemas.PROMOTION_SCHEMA) + .put("customer_demographics", Schemas.CUSTOMER_DEMOGRAPHIC_SCHEMA) + .put("web_sales", Schemas.WEB_SALES_SCHEMA) + .put("inventory", Schemas.INVENTORY_SCHEMA) + */ + .build(); + + PCollectionTuple tables = PCollectionTuple.empty(pipeline); + for (final Map.Entry tableSchema : hSchemas.entrySet()) { + final String tableName = tableSchema.getKey(); + + if (query.contains(tableName)) { + LOG.info("HIT: tablename {}", tableName); + + final String filePattern = inputDirectory + tableSchema.getKey() + ".tbl*"; + final PCollection table = GenericSourceSink.read(pipeline, filePattern) + .apply("StringToRow", new TextTableProvider.CsvToRow(tableSchema.getValue(), csvFormat)) + .setCoder(tableSchema.getValue().getRowCoder()) + .setName(tableSchema.getKey()); + tables = tables.and(new TupleTag<>(tableSchema.getKey()), table); + + LOG.info("FilePattern {} / Tables {}", filePattern, tables); + } + } + return tables; + } + + + /** + * @param args arguments. + */ + public static void main(final String[] args) { + final String queryFilePath = args[0]; + final String inputDirectory = args[1]; + final String outputFilePath = args[2]; + + LOG.info("{} / {} / {}", queryFilePath, inputDirectory, outputFilePath); + + final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class); + options.setRunner(NemoRunner.class); + options.setJobName("TPC-H"); + final Pipeline p = Pipeline.create(options); + + final String queryString = getQueryString(queryFilePath); + // Create tables + final CSVFormat csvFormat = CSVFormat.MYSQL + .withDelimiter('|') + .withNullString("") + .withTrailingDelimiter(); + final PCollectionTuple tables = getHTables(p, csvFormat, inputDirectory, queryString); + + // Run the TPC-H query + final PCollection result = tables.apply(SqlTransform.query(queryString)); + + final PCollection resultToWrite = result.apply(MapElements.into(TypeDescriptors.strings()).via( + new SerializableFunction() { + @Override + public String apply(final Row input) { + System.out.println(input.getValues().toString()); + return input.getValues().toString(); + } + })); + + GenericSourceSink.write(resultToWrite, outputFilePath); + + // Then run + p.run(); + } + + private static String getQueryString(final String queryFilePath) { + final List lines = new ArrayList<>(); + try (final Stream stream = Files.lines(Paths.get(queryFilePath))) { + stream.forEach(lines::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + System.out.println(lines); + + final StringBuilder sb = new StringBuilder(); + lines.forEach(line -> { + sb.append(" "); + sb.append(line); + }); + + final String concate = sb.toString(); + System.out.println(concate); + final String cleanOne = concate.replaceAll("\n", " "); + System.out.println(cleanOne); + final String cleanTwo = cleanOne.replaceAll("\t", " "); + System.out.println(cleanTwo); + + return cleanTwo; + } +} diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 05ec127c88..90557594b3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -60,7 +60,7 @@ private CrailFileMetadata(final String metaFilePath) { try { this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); file.syncDir(); - }catch (Exception e){ + } catch (Exception e){ //when it already exists this.file = fs.lookup(metaFilePath).get().asFile(); file.syncDir(); @@ -83,12 +83,17 @@ private CrailFileMetadata(final String metaFilePath, super(partitionMetadataList); this.metaFilePath = metaFilePath; try { - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); - this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - } - catch(Exception e){ + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } catch (Exception e) { + //when it already exists + this.file = fs.lookup(metaFilePath).get().asFile(); + file.syncDir(); + } + } catch(Exception e){ LOG.info("HY: CrailConfiguration failed"); e.printStackTrace(); } From aaeb8dc1ab422758f8a454fb9c7d68d3cd128657 Mon Sep 17 00:00:00 2001 From: Jeongyoon Eo Date: Mon, 18 Feb 2019 20:50:31 +0900 Subject: [PATCH 077/235] metadata path as local dir --- .../org/apache/nemo/examples/beam/GenericSourceSink.java | 2 +- .../org/apache/nemo/runtime/executor/data/DataUtil.java | 3 ++- .../runtime/executor/data/metadata/CrailFileMetadata.java | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java index 2ab09a7f32..3ebcb4cef2 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java @@ -42,7 +42,7 @@ * Helper class for handling source/sink in a generic way. * Assumes String-type PCollections. */ -final class GenericSourceSink { +public final class GenericSourceSink { /** * Default Constructor. */ diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 80e83df4e2..36182b8d5c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -177,7 +177,8 @@ public static String blockIdToFilePath(final String blockId, */ public static String blockIdToMetaFilePath(final String blockId, final String fileDirectory) { - return fileDirectory + "/" + blockId + "_meta"; + //return fileDirectory + "/" + blockId + "_meta"; + return "../crail_meta_dir" + blockId; } /** diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 68e2feda03..5cdcafcc15 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -55,6 +55,7 @@ public final class CrailFileMetadata extends FileMetadat private CrailFileMetadata(final String metaFilePath) { super(); this.metaFilePath = metaFilePath; + /* try { conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); @@ -71,6 +72,7 @@ private CrailFileMetadata(final String metaFilePath) { LOG.info("HY: CrailConfiguration failed"); e.printStackTrace(); } + */ } /** @@ -83,6 +85,7 @@ private CrailFileMetadata(final String metaFilePath, final List> partitionMetadataList) { super(partitionMetadataList); this.metaFilePath = metaFilePath; + /* try { try { conf = new CrailConfiguration(); @@ -98,6 +101,7 @@ private CrailFileMetadata(final String metaFilePath, LOG.info("HY: CrailConfiguration failed"); e.printStackTrace(); } + */ } /** @@ -158,11 +162,11 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); - CrailFile file=null; + CrailFile file; try { file = fs.lookup(metaFilePath).get().asFile(); file.syncDir(); - }catch (Exception e){ + } catch (Exception e) { throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } final List> partitionMetadataList = new ArrayList<>(); From e28b41c1152256b4aba9b7637532c939660bd772 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 20 Feb 2019 18:24:35 +0900 Subject: [PATCH 078/235] logging delete --- .../apache/nemo/client/ClientEndpointTest.java | 4 ++-- .../optimizer/policy/DisaggregationPolicy.java | 2 +- .../executor/data/stores/CrailFileStore.java | 2 -- .../nemo/runtime/master/PlanStateManager.java | 8 +------- .../master/scheduler/BatchScheduler.java | 18 +++++------------- 5 files changed, 9 insertions(+), 25 deletions(-) diff --git a/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java b/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java index 2c54ef3542..1a12968673 100644 --- a/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java +++ b/client/src/test/java/org/apache/nemo/client/ClientEndpointTest.java @@ -77,8 +77,8 @@ public void testState() throws Exception { final List tasks = physicalPlan.getStageDAG().getTopologicalSort().stream() .flatMap(stage -> planStateManager.getTaskAttemptsToSchedule(stage.getId()).stream()) .collect(Collectors.toList()); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING, 0)); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE, 0)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE)); assertEquals(PlanState.State.COMPLETE, clientEndpoint.waitUntilJobFinish()); } diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java index 12e620e25b..c7cc8667f1 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/DisaggregationPolicy.java @@ -33,7 +33,7 @@ public final class DisaggregationPolicy implements Policy { public static final PolicyBuilder BUILDER = new PolicyBuilder() - .registerCompileTimePass(new DisaggregationEdgeDataStorePass()) //***확인 + .registerCompileTimePass(new DisaggregationEdgeDataStorePass()) .registerCompileTimePass(new LoopOptimizationCompositePass()) .registerCompileTimePass(new DefaultCompositePass()); private final Policy policy; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 7cfe2a0c9c..b596230a98 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -81,9 +81,7 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - LOG.info("HY: Before entering metadata creation"); final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath); - LOG.info("HY: Finished creating metadata"); return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } diff --git a/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java b/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java index 0f0ee38bae..8b9ec3a69d 100644 --- a/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java +++ b/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java @@ -298,7 +298,7 @@ public synchronized boolean setNumOfClones(final String stageId, final int taskI * @param taskId the ID of the task. * @param newTaskState the new state of the task. */ - public synchronized void onTaskStateChanged(final String taskId, final TaskState.State newTaskState, long time) { + public synchronized void onTaskStateChanged(final String taskId, final TaskState.State newTaskState) { // Change task state final StateMachine taskState = getTaskStateHelper(taskId).getStateMachine(); @@ -329,13 +329,8 @@ public synchronized void onTaskStateChanged(final String taskId, final TaskState }) .count(); if (newTaskState.equals(TaskState.State.COMPLETE)) { - //Task 완료한 상태 LOG.info("{} completed: {} Task(s) out of {} are remaining in this stage", taskId, taskStatesOfThisStage.size() - numOfCompletedTaskIndicesInThisStage, taskStatesOfThisStage.size()); - if(taskStatesOfThisStage.size()-numOfCompletedTaskIndicesInThisStage==0){ - LOG.info("Stage start time: {}", time); - LOG.info("time consumed for the stage: {}", System.nanoTime() - time); - } } // Maintain info for speculative execution @@ -377,7 +372,6 @@ public synchronized void onTaskStateChanged(final String taskId, final TaskState } } - /** * (PRIVATE METHOD) * Updates the state of a stage. diff --git a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/BatchScheduler.java b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/BatchScheduler.java index 744bd26119..b80a2ce1e7 100644 --- a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/BatchScheduler.java +++ b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/BatchScheduler.java @@ -81,14 +81,9 @@ public final class BatchScheduler implements Scheduler { /** * The below variables depend on the submitted plan to execute. */ - private List> sortedScheduleGroups; //스케줄러 그룹이 리스트로 이어져있고, 하나의 그룹 안에서 여러 스테이지가 있을 수 있으므로? + private List> sortedScheduleGroups; private List dynOptDataHandlers; - /** - * Variable for logging the time consumed for each stage - * */ - long stageStartNano; - @Inject private BatchScheduler(final TaskDispatcher taskDispatcher, final PendingTaskCollectionPointer pendingTaskCollectionPointer, @@ -184,7 +179,7 @@ public void onTaskStateReportFromExecutor(final String executorId, @Nullable final String vertexPutOnHold, final TaskState.RecoverableTaskFailureCause failureCause) { // Do change state, as this notification is for the current task attempt. - planStateManager.onTaskStateChanged(taskId, newState, stageStartNano); + planStateManager.onTaskStateChanged(taskId, newState); switch (newState) { case COMPLETE: onTaskExecutionComplete(executorId, taskId); @@ -342,9 +337,6 @@ private void doSchedule() { .flatMap(stage -> selectSchedulableTasks(stage).stream()) .collect(Collectors.toList()); if (!tasksToSchedule.isEmpty()) { - //ScheduleGroup에 stage가 하나만 있을 때는 이 시점이 stage들을 시작하는 시점이라고 할 수 있음 - if(tasksToSchedule.size()==1) stageStartNano = System.nanoTime(); //** - LOG.info("Scheduling some tasks in {}, which are in the same ScheduleGroup", tasksToSchedule.stream() .map(Task::getTaskId) .map(RuntimeIdManager::getStageIdFromTaskId) @@ -378,8 +370,8 @@ private List selectSchedulableTasks(final Stage stageToSchedule) { if (stageToSchedule.getPropertyValue(IgnoreSchedulingTempDataReceiverProperty.class).orElse(false)) { // Ignore ghost stage. for (final String taskId : planStateManager.getTaskAttemptsToSchedule(stageToSchedule.getId())) { - planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING, stageStartNano); - planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE, stageStartNano); + planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING); + planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE); } return Collections.emptyList(); @@ -538,7 +530,7 @@ private void retryTasksAndRequiredParents(final Set tasks) { final Set tasksToRetry = Sets.union(tasks, requiredParents); LOG.info("Will be retried: {}", tasksToRetry); tasksToRetry.forEach( - taskToReExecute -> planStateManager.onTaskStateChanged(taskToReExecute, TaskState.State.SHOULD_RETRY, stageStartNano)); + taskToReExecute -> planStateManager.onTaskStateChanged(taskToReExecute, TaskState.State.SHOULD_RETRY)); } private Set recursivelyGetParentTasksForLostBlocks(final Set children) { From 0a7440675aedf2be0d8c55011a9674485ecb6922 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 20 Feb 2019 18:28:48 +0900 Subject: [PATCH 079/235] logging delete --- .../nemo/runtime/executor/data/stores/LocalFileStore.java | 3 --- .../org/apache/nemo/runtime/master/PlanStateManager.java | 2 -- .../nemo/runtime/master/scheduler/StreamingScheduler.java | 2 +- .../nemo/runtime/master/scheduler/TaskDispatcher.java | 2 +- .../apache/nemo/runtime/master/PlanStateManagerTest.java | 8 ++++---- 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java index 44cf01d1b2..477fd9be45 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java @@ -41,7 +41,6 @@ @ThreadSafe public final class LocalFileStore extends LocalBlockStore { private final String fileDirectory; - private static final Logger LOG = LoggerFactory.getLogger(TaskExecutor.class.getName()); /** * Constructor. * @@ -80,9 +79,7 @@ public void writeBlock(final Block block) throws BlockWriteException { } else if (!block.isCommitted()) { throw new BlockWriteException(new Throwable("The block " + block.getId() + "is not committed yet.")); } else { - LOG.info("HY: writeBlock no.{}", block.getId()); getBlockMap().put(block.getId(), block); - //여기에서 블럭을 write?? to LocalFileStore?? 아닐수도,,, } } diff --git a/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java b/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java index 8b9ec3a69d..5da9466f45 100644 --- a/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java +++ b/runtime/master/src/main/java/org/apache/nemo/runtime/master/PlanStateManager.java @@ -301,8 +301,6 @@ public synchronized boolean setNumOfClones(final String stageId, final int taskI public synchronized void onTaskStateChanged(final String taskId, final TaskState.State newTaskState) { // Change task state final StateMachine taskState = getTaskStateHelper(taskId).getStateMachine(); - - LOG.debug("Task State Transition: id {}, from {} to {}", new Object[]{taskId, taskState.getCurrentState(), newTaskState}); metricStore.getOrCreateMetric(TaskMetric.class, taskId) diff --git a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/StreamingScheduler.java b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/StreamingScheduler.java index d78a56a6be..71f1da290e 100644 --- a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/StreamingScheduler.java +++ b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/StreamingScheduler.java @@ -123,7 +123,7 @@ public void onTaskStateReportFromExecutor(final String executorId, final TaskState.State newState, @Nullable final String vertexPutOnHold, final TaskState.RecoverableTaskFailureCause failureCause) { - planStateManager.onTaskStateChanged(taskId, newState, 0); + planStateManager.onTaskStateChanged(taskId, newState); switch (newState) { case COMPLETE: diff --git a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/TaskDispatcher.java b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/TaskDispatcher.java index 7fd852cccc..c50f1e9ddf 100644 --- a/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/TaskDispatcher.java +++ b/runtime/master/src/main/java/org/apache/nemo/runtime/master/scheduler/TaskDispatcher.java @@ -130,7 +130,7 @@ private void doScheduleTaskList() { final ExecutorRepresenter selectedExecutor = schedulingPolicy.selectExecutor(candidateExecutors.getValue(), task); // update metadata first - planStateManager.onTaskStateChanged(task.getTaskId(), TaskState.State.EXECUTING, 0); + planStateManager.onTaskStateChanged(task.getTaskId(), TaskState.State.EXECUTING); LOG.info("{} scheduled to {}", task.getTaskId(), selectedExecutor.getExecutorId()); // send the task diff --git a/runtime/master/src/test/java/org/apache/nemo/runtime/master/PlanStateManagerTest.java b/runtime/master/src/test/java/org/apache/nemo/runtime/master/PlanStateManagerTest.java index 44ac79897e..f2339cf617 100644 --- a/runtime/master/src/test/java/org/apache/nemo/runtime/master/PlanStateManagerTest.java +++ b/runtime/master/src/test/java/org/apache/nemo/runtime/master/PlanStateManagerTest.java @@ -80,8 +80,8 @@ public void testPhysicalPlanStateChanges() throws Exception { final Stage stage = stageList.get(stageIdx); final List taskIds = planStateManager.getTaskAttemptsToSchedule(stage.getId()); taskIds.forEach(taskId -> { - planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING, 0); - planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE, 0); + planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING); + planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE); if (RuntimeIdManager.getIndexFromTaskId(taskId) == taskIds.size() - 1) { assertEquals(StageState.State.COMPLETE, planStateManager.getStageState(stage.getId())); } @@ -115,8 +115,8 @@ public void testWaitUntilFinish() throws Exception { final List tasks = physicalPlan.getStageDAG().getTopologicalSort().stream() .flatMap(stage -> planStateManager.getTaskAttemptsToSchedule(stage.getId()).stream()) .collect(Collectors.toList()); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING, 0)); - tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE, 0)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.EXECUTING)); + tasks.forEach(taskId -> planStateManager.onTaskStateChanged(taskId, TaskState.State.COMPLETE)); final PlanState.State completedState = planStateManager.waitUntilFinish(); assertEquals(PlanState.State.COMPLETE, completedState); } From f5a1edbcd6407779e7e5091716e32b0c2ecc3043 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 20 Feb 2019 18:33:29 +0900 Subject: [PATCH 080/235] logging delete --- .../nemo/examples/beam/WordCountITCase.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java index 97bec8156b..3a46e82b87 100644 --- a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java +++ b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java @@ -132,26 +132,6 @@ public void testSpeculativeExecution() throws Exception { .build()); } - @Test (timeout = ExampleTestArgs.TIMEOUT) - public void testDisaggregationPolicy() throws Exception{ - JobLauncher.main(builder - .addResourceJson(executorResourceFileName) - .addJobId(WordCountITCase.class.getSimpleName() + " DisaggregationPolicy") - .addMaxTaskAttempt(Integer.MAX_VALUE) - .addOptimizationPolicy(DisaggregationPolicy.class.getCanonicalName()) - .build()); - } - - @Test (timeout = ExampleTestArgs.TIMEOUT) - public void testDefaultPolicy() throws Exception{ - JobLauncher.main(builder - .addResourceJson(executorResourceFileName) - .addJobId(WordCountITCase.class.getSimpleName() + " DefaultPolicy") - .addMaxTaskAttempt(Integer.MAX_VALUE) - .addOptimizationPolicy(DefaultPolicy.class.getCanonicalName()) - .build()); - } - @Test (timeout = ExampleTestArgs.TIMEOUT) public void testCrailPolicy() throws Exception{ JobLauncher.main(builder From b48869d147aec7cd5d6b6af612fee0e9f57d28fd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Feb 2019 13:57:50 +0900 Subject: [PATCH 081/235] CrailFileBlock created --- .../executor/data/block/CrailFileBlock.java | 403 ++++++++++++++++++ .../executor/data/stores/CrailFileStore.java | 15 +- 2 files changed, 411 insertions(+), 7 deletions(-) create mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java new file mode 100644 index 0000000000..b94a1ad953 --- /dev/null +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -0,0 +1,403 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.nemo.runtime.executor.data.block; + +import org.apache.crail.*; +import org.apache.nemo.common.KeyRange; +import org.apache.nemo.common.Pair; +import org.apache.nemo.common.exception.BlockFetchException; +import org.apache.nemo.common.exception.BlockWriteException; +import org.apache.nemo.runtime.executor.data.DataUtil; +import org.apache.nemo.runtime.executor.data.FileArea; +import org.apache.nemo.runtime.executor.data.metadata.FileMetadata; +import org.apache.nemo.runtime.executor.data.metadata.PartitionMetadata; +import org.apache.nemo.runtime.executor.data.partition.NonSerializedPartition; +import org.apache.nemo.runtime.executor.data.partition.Partition; +import org.apache.nemo.runtime.executor.data.partition.SerializedPartition; +import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; +import java.io.*; +import java.util.*; + +/** + * This class represents a block which is stored in (local or remote) file. + * Concurrent read is supported, but concurrent write is not supported. + * + * @param the key type of its partitions. + */ +@NotThreadSafe +public final class CrailFileBlock implements Block { + private static final Logger LOG = LoggerFactory.getLogger(CrailFileBlock.class.getName()); + private final String id; + private final Map> nonCommittedPartitionsMap; + private final Serializer serializer; + private final String filePath; + private final FileMetadata metadata; + CrailStore fs = null; + CrailFile file = null; + + /** + * Constructor. + * + * @param blockId the ID of this block. + * @param serializer the {@link Serializer}. + * @param filePath the path of the file that this block will be stored. + * @param metadata the metadata for this block. + * @param fs CrailStore instance of the Crail storage. + */ + + public CrailFileBlock(final String blockId, + final Serializer serializer, + final String filePath, + final FileMetadata metadata, + CrailStore fs) { + this.id = blockId; + this.nonCommittedPartitionsMap = new HashMap<>(); + this.serializer = serializer; + this.filePath = filePath; + this.metadata = metadata; + try { + LOG.info("HY: FileBlock entered"); + this.fs = fs; + this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + LOG.info("HY: crail file block created"); + } catch (Exception e1) { + LOG.info("HY: crail file block creation might have failed"); + try{ + this.fs = fs; + this.file = fs.lookup(filePath).get().asFile(); + file.syncDir(); + LOG.info("HY: {} fetched", blockId); + } + catch(Exception e2){ + LOG.info("HY: {} fetch failed"); + } + } + } + + /** + * Writes the serialized data of this block having a specific key value as a partition to the file + * where this block resides. + * Invariant: This method does not support concurrent write. + * + * @param serializedPartitions the iterable of the serialized partitions to write. + * @throws IOException if fail to write. + */ + private void writeToFile(final Iterable> serializedPartitions) throws Exception { + final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); + for(final SerializedPartition serializedPartition : serializedPartitions){ + metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); + fileOutputStream.write(serializedPartition.getData()); + } + fileOutputStream.close(); + } + /** + * Writes an element to non-committed block. + * Invariant: This should not be invoked after this block is committed. + * Invariant: This method does not support concurrent write. + * + * @param key the key. + * @param element the element to write. + * @throws BlockWriteException for any error occurred while trying to write a block. + */ + @Override + public void write(final K key, + final Object element) throws BlockWriteException { + if (metadata.isCommitted()) { + throw new BlockWriteException(new Throwable("The partition is already committed!")); + } else { + try { + SerializedPartition partition = nonCommittedPartitionsMap.get(key); + if (partition == null) { + partition = new SerializedPartition<>(key, serializer); + nonCommittedPartitionsMap.put(key, partition); + } + partition.write(element); + } catch (final IOException e) { + throw new BlockWriteException(e); + } + } + } + + /** + * Writes {@link NonSerializedPartition}s to this block. + * Invariant: This method does not support concurrent write. + * + * @param partitions the {@link NonSerializedPartition}s to write. + * @throws BlockWriteException for any error occurred while trying to write a block. + */ + @Override + public void writePartitions(final Iterable> partitions) + throws BlockWriteException { + if (metadata.isCommitted()) { + throw new BlockWriteException(new Throwable("The partition is already committed!")); + } else { + try { + final Iterable> convertedPartitions = + DataUtil.convertToSerPartitions(serializer, partitions); + writeSerializedPartitions(convertedPartitions); + } catch (final IOException e) { + throw new BlockWriteException(e); + } + } + } + + /** + * Writes {@link SerializedPartition}s to this block. + * Invariant: This method does not support concurrent write. + * + * @param partitions the {@link SerializedPartition}s to store. + * @throws BlockWriteException for any error occurred while trying to write a block. + */ + @Override + public void writeSerializedPartitions(final Iterable> partitions) + throws BlockWriteException { + if (metadata.isCommitted()) { + throw new BlockWriteException(new Throwable("The partition is already committed!")); + } else { + try { + writeToFile(partitions); + } catch (final IOException e) { + throw new BlockWriteException(e); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + /** + * Retrieves the partitions of this block from the file in a specific key range and deserializes it. + * + * @param keyRange the key range. + * @return an iterable of {@link NonSerializedPartition}s. + * @throws BlockFetchException for any error occurred while trying to fetch a block. + */ + @Override + public Iterable> readPartitions(final KeyRange keyRange) throws BlockFetchException { + if (!metadata.isCommitted()) { + throw new BlockFetchException(new Throwable("Cannot retrieve elements before a block is committed")); + } else { + // Deserialize the data + final List> deserializedPartitions = new ArrayList<>(); + try { + final List> partitionKeyBytesPairs = new ArrayList<>(); + try{ + final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0); + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + final K key = partitionMetadata.getKey(); + if (keyRange.includes(key)) { + // The key value of this partition is in the range. + final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + } else { + // Have to skip this partition. + skipBytes(fileStream, partitionMetadata.getPartitionSize()); + } + } + }catch(Exception e){ + e.printStackTrace(); + } + for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { + final NonSerializedPartition deserializePartition = + DataUtil.deserializePartition( + partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), + new ByteArrayInputStream(partitionKeyBytes.right())); + deserializedPartitions.add(deserializePartition); + } + } catch (final IOException e) { + throw new BlockFetchException(e); + } + + return deserializedPartitions; + } + } + + /** + * Retrieves the {@link SerializedPartition}s in a specific key range. + * Invariant: This should not be invoked before this block is committed. + * + * @param keyRange the key range to retrieve. + * @return an iterable of {@link SerializedPartition}s. + * @throws BlockFetchException for any error occurred while trying to fetch a block. + */ + @Override + public Iterable> readSerializedPartitions(final KeyRange keyRange) throws BlockFetchException { + if (!metadata.isCommitted()) { + throw new BlockFetchException(new Throwable("Cannot retrieve elements before a block is committed")); + } else { + // Deserialize the data + final List> partitionsInRange = new ArrayList<>(); + try { + try (final FileInputStream fileStream = new FileInputStream(filePath)) { + for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { + final K key = partitionmetadata.getKey(); + if (keyRange.includes(key)) { + // The hash value of this partition is in the range. + final byte[] serializedData = new byte[partitionmetadata.getPartitionSize()]; + final int readBytes = fileStream.read(serializedData); + if (readBytes != serializedData.length) { + throw new IOException("The read data size does not match with the partition size."); + } + partitionsInRange.add(new SerializedPartition<>( + key, serializedData, serializedData.length)); + } else { + // Have to skip this partition. + skipBytes(fileStream, partitionmetadata.getPartitionSize()); + } + } + } + } catch (final IOException e) { + throw new BlockFetchException(e); + } + + return partitionsInRange; + } + } + + /** + * Skips some bytes in a input stream. + * + * @param inputStream the stream to skip. + * @param bytesToSkip the number of bytes to skip. + * @throws IOException if fail to skip. + */ + private void skipBytes(final InputStream inputStream, + final long bytesToSkip) throws IOException { + long remainingBytesToSkip = bytesToSkip; + while (remainingBytesToSkip > 0) { + final long skippedBytes = inputStream.skip(bytesToSkip); + remainingBytesToSkip -= skippedBytes; + if (skippedBytes <= 0) { + throw new IOException("The file stream failed to skip to the next block."); + } + } + } + + /** + * Retrieves the list of {@link FileArea}s for the specified {@link KeyRange}. + * + * @param keyRange the key range + * @return list of the file areas + * @throws IOException if failed to open a file channel + */ + public List asFileAreas(final KeyRange keyRange) throws IOException { + if (!metadata.isCommitted()) { + throw new IOException("Cannot retrieve elements before a block is committed"); + } else { + final List fileAreas = new ArrayList<>(); + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + if (keyRange.includes(partitionMetadata.getKey())) { + fileAreas.add(new FileArea(filePath, partitionMetadata.getOffset(), partitionMetadata.getPartitionSize())); + } + } + return fileAreas; + } + } + + /** + * Deletes the file that contains this block data. + * This method have to be called after all read is completed (or failed). + * + * @throws IOException if failed to delete. + */ + public void deleteFile() throws IOException { + metadata.deleteMetadata(); + try { + if(fs.lookup(filePath).get()!=null) + fs.delete(filePath, true); + }catch (IOException e){ + e.printStackTrace(); + } + catch (Exception e){ + LOG.info("HY: deleteFile failed"); + e.printStackTrace(); + } + } + + /** + * Commits this block to prevent further write. + * + * @return the size of each partition. + * @throws BlockWriteException for any error occurred while trying to write a block. + */ + @Override + public synchronized Optional> commit() throws BlockWriteException { + try { + if (!metadata.isCommitted()) { + commitPartitions(); + metadata.commitBlock(); + } + final List> partitionMetadataList = metadata.getPartitionMetadataList(); + final Map partitionSizes = new HashMap<>(partitionMetadataList.size()); + for (final PartitionMetadata partitionMetadata : partitionMetadataList) { + final K key = partitionMetadata.getKey(); + final long partitionSize = partitionMetadata.getPartitionSize(); + if (partitionSizes.containsKey(key)) { + partitionSizes.compute(key, + (existingKey, existingValue) -> existingValue + partitionSize); + } else { + partitionSizes.put(key, partitionSize); + } + } + return Optional.of(partitionSizes); + } catch (final IOException e) { + throw new BlockWriteException(e); + } + } + + /** + * Commits all un-committed partitions. + * The committed partitions will be flushed to the storage. + */ + @Override + public synchronized void commitPartitions() throws BlockWriteException { + final List> partitions = new ArrayList<>(); + try { + for (final Partition partition : nonCommittedPartitionsMap.values()) { + partition.commit(); + partitions.add((SerializedPartition) partition); + } + writeToFile(partitions); + nonCommittedPartitionsMap.clear(); + } catch (final IOException e) { + throw new BlockWriteException(e); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * @return the ID of this block. + */ + @Override + public String getId() { return id; } + + /** + * @return whether this block is committed or not. + */ + @Override + public boolean isCommitted() { + return metadata.isCommitted(); + } +} diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index b596230a98..5dc80b0259 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -25,6 +25,7 @@ import org.apache.nemo.common.exception.BlockWriteException; import org.apache.nemo.runtime.executor.data.*; import org.apache.nemo.runtime.executor.data.block.Block; +import org.apache.nemo.runtime.executor.data.block.CrailFileBlock; import org.apache.nemo.runtime.executor.data.metadata.CrailFileMetadata; import org.apache.nemo.runtime.executor.data.metadata.FileMetadata; import org.apache.nemo.runtime.executor.data.metadata.LocalFileMetadata; @@ -82,7 +83,7 @@ public Block createBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath); - return new FileBlock<>(blockId, serializer, filePath, metadata, fs); + return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); } /** @@ -94,9 +95,9 @@ public Block createBlock(final String blockId) { @Override public void writeBlock(final Block block) throws BlockWriteException { - if (!(block instanceof FileBlock)) { + if (!(block instanceof CrailFileBlock)) { throw new BlockWriteException(new Throwable( - this.toString() + " only accept " + FileBlock.class.getName())); + this.toString() + " only accept " + CrailFileBlock.class.getName())); } else if (!block.isCommitted()) { throw new BlockWriteException(new Throwable("The block " + block.getId() + "is not committed yet.")); } @@ -120,7 +121,7 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio return Optional.empty(); } else { try { - final FileBlock block = getBlockFromFile(blockId); + final CrailFileBlock block = getBlockFromFile(blockId); return Optional.of(block); } catch (final IOException e) { throw new BlockFetchException(e); @@ -147,7 +148,7 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ try { if (fs.lookup(filePath).get()!=null) { - final FileBlock block = getBlockFromFile(blockId); + final CrailFileBlock block = getBlockFromFile(blockId); block.deleteFile(); return true; } else { @@ -172,11 +173,11 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ * @return the {@link FileBlock} gotten. * @throws IOException if fail to get. */ - private FileBlock getBlockFromFile(final String blockId) throws Exception { + private CrailFileBlock getBlockFromFile(final String blockId) throws Exception { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final CrailFileMetadata metadata = CrailFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); - return new FileBlock<>(blockId, serializer, filePath, metadata, fs); + return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); } } From cca426f78d96641d32382ce8dbed36e713275532 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Feb 2019 13:59:50 +0900 Subject: [PATCH 082/235] FileBlock reverted --- .../executor/data/block/FileBlock.java | 143 ++++-------------- 1 file changed, 30 insertions(+), 113 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index df5621ed9f..a833100bfd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -18,13 +18,10 @@ */ package org.apache.nemo.runtime.executor.data.block; -import org.apache.crail.*; -import org.apache.crail.conf.CrailConfiguration; import org.apache.nemo.common.Pair; import org.apache.nemo.common.exception.BlockFetchException; import org.apache.nemo.common.exception.BlockWriteException; import org.apache.nemo.common.KeyRange; -import org.apache.nemo.common.ir.edge.executionproperty.DataStoreProperty; import org.apache.nemo.runtime.executor.data.*; import org.apache.nemo.runtime.executor.data.partition.NonSerializedPartition; import org.apache.nemo.runtime.executor.data.partition.Partition; @@ -37,7 +34,6 @@ import javax.annotation.concurrent.NotThreadSafe; import java.io.*; -import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; @@ -56,9 +52,6 @@ public final class FileBlock implements Block { private final Serializer serializer; private final String filePath; private final FileMetadata metadata; - //CrailConfiguration conf = null; - CrailStore fs = null; - CrailFile file = null; /** * Constructor. @@ -68,7 +61,6 @@ public final class FileBlock implements Block { * @param filePath the path of the file that this block will be stored. * @param metadata the metadata for this block. */ - public FileBlock(final String blockId, final Serializer serializer, final String filePath, @@ -80,40 +72,6 @@ public FileBlock(final String blockId, this.metadata = metadata; } - public FileBlock(final String blockId, - final Serializer serializer, - final String filePath, - final FileMetadata metadata, - CrailStore fs) { - this.id = blockId; - this.nonCommittedPartitionsMap = new HashMap<>(); - this.serializer = serializer; - this.filePath = filePath; - this.metadata = metadata; - if(filePath.contains("crail")) { - try { - LOG.info("HY: FileBlock entered"); - this.fs = fs; - this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - LOG.info("HY: crail file block created"); - } catch (Exception e1) { - LOG.info("HY: crail file block creation might have failed"); - //e1.printStackTrace(); - try{ - this.fs = fs; - this.file = fs.lookup(filePath).get().asFile(); - file.syncDir(); - LOG.info("HY: {} fetched", blockId); - } - catch(Exception e2){ - LOG.info("HY: {} fetch failed"); - // e2.printStackTrace(); - } - } - } - } - /** * Writes the serialized data of this block having a specific key value as a partition to the file * where this block resides. @@ -123,28 +81,16 @@ public FileBlock(final String blockId, * @throws IOException if fail to write. */ private void writeToFile(final Iterable> serializedPartitions) - throws Exception { - if (filePath.contains("crail")) { - LOG.info("HY: FileBlock writeToFile started"); - //Crail 디렉토리의 경우 미리 생성해놓은 CrailFile을 이용하여 write - final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); - for(final SerializedPartition serializedPartition : serializedPartitions){ + throws IOException { + try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { + for (final SerializedPartition serializedPartition : serializedPartitions) { + // Reserve a partition write and get the metadata. metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); - fileOutputStream.write(serializedPartition.getData()); - } - fileOutputStream.close(); - LOG.info("HY: FileBlock writeToFile ended"); - } - else { - try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { - for (final SerializedPartition serializedPartition : serializedPartitions) { - // Reserve a partition write and get the metadata. - metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); - fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); - } + fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); } } } + /** * Writes an element to non-committed block. * Invariant: This should not be invoked after this block is committed. @@ -182,13 +128,13 @@ public void write(final K key, */ @Override public void writePartitions(final Iterable> partitions) - throws BlockWriteException { + throws BlockWriteException { if (metadata.isCommitted()) { throw new BlockWriteException(new Throwable("The partition is already committed!")); } else { try { final Iterable> convertedPartitions = - DataUtil.convertToSerPartitions(serializer, partitions); + DataUtil.convertToSerPartitions(serializer, partitions); writeSerializedPartitions(convertedPartitions); } catch (final IOException e) { throw new BlockWriteException(e); @@ -205,7 +151,7 @@ public void writePartitions(final Iterable> partitions */ @Override public void writeSerializedPartitions(final Iterable> partitions) - throws BlockWriteException { + throws BlockWriteException { if (metadata.isCommitted()) { throw new BlockWriteException(new Throwable("The partition is already committed!")); } else { @@ -213,8 +159,6 @@ public void writeSerializedPartitions(final Iterable> par writeToFile(partitions); } catch (final IOException e) { throw new BlockWriteException(e); - } catch (Exception e) { - e.printStackTrace(); } } } @@ -235,45 +179,25 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - if (filePath.contains("crail")) { - try{ - final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0); - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { - final K key = partitionMetadata.getKey(); - if (keyRange.includes(key)) { - // The key value of this partition is in the range. - final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - } else { - // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); - } - } - }catch(Exception e){ - e.printStackTrace(); - } - } else { - try (final FileInputStream fileStream = new FileInputStream(filePath)) { - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { - final K key = partitionMetadata.getKey(); - if (keyRange.includes(key)) { - // The key value of this partition is in the range. - final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - } else { - // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); - } + try (final FileInputStream fileStream = new FileInputStream(filePath)) { + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + final K key = partitionMetadata.getKey(); + if (keyRange.includes(key)) { + // The key value of this partition is in the range. + final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + } else { + // Have to skip this partition. + skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } } for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { final NonSerializedPartition deserializePartition = - DataUtil.deserializePartition( - partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), - new ByteArrayInputStream(partitionKeyBytes.right())); + DataUtil.deserializePartition( + partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), + new ByteArrayInputStream(partitionKeyBytes.right())); deserializedPartitions.add(deserializePartition); } } catch (final IOException e) { @@ -311,7 +235,7 @@ public Iterable> readSerializedPartitions(final KeyRange throw new IOException("The read data size does not match with the partition size."); } partitionsInRange.add(new SerializedPartition<>( - key, serializedData, serializedData.length)); + key, serializedData, serializedData.length)); } else { // Have to skip this partition. skipBytes(fileStream, partitionmetadata.getPartitionSize()); @@ -374,15 +298,8 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { */ public void deleteFile() throws IOException { metadata.deleteMetadata(); - try { - if(fs.lookup(filePath).get()!=null) - fs.delete(filePath, true); - }catch (IOException e){ - e.printStackTrace(); - } - catch (Exception e){ - LOG.info("HY: deleteFile failed"); - e.printStackTrace(); + if (new File(filePath).exists()) { + Files.delete(Paths.get(filePath)); } } @@ -406,7 +323,7 @@ public synchronized Optional> commit() throws BlockWriteException { final long partitionSize = partitionMetadata.getPartitionSize(); if (partitionSizes.containsKey(key)) { partitionSizes.compute(key, - (existingKey, existingValue) -> existingValue + partitionSize); + (existingKey, existingValue) -> existingValue + partitionSize); } else { partitionSizes.put(key, partitionSize); } @@ -433,8 +350,6 @@ public synchronized void commitPartitions() throws BlockWriteException { nonCommittedPartitionsMap.clear(); } catch (final IOException e) { throw new BlockWriteException(e); - } catch (Exception e) { - e.printStackTrace(); } } @@ -442,7 +357,9 @@ public synchronized void commitPartitions() throws BlockWriteException { * @return the ID of this block. */ @Override - public String getId() { return id; } + public String getId() { + return id; + } /** * @return whether this block is committed or not. From cb636b6fe00d4abe3e8850a1f4f88dbe8f383aeb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Feb 2019 14:24:30 +0900 Subject: [PATCH 083/235] CrailFileBlock edit --- .../runtime/executor/data/BlockManagerWorker.java | 11 +++++++++-- .../runtime/executor/data/block/CrailFileBlock.java | 6 ++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 2ae4fc8eb0..5d5f3a30a1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -36,6 +36,7 @@ import org.apache.nemo.runtime.executor.bytetransfer.ByteOutputContext; import org.apache.nemo.runtime.executor.bytetransfer.ByteTransfer; import org.apache.nemo.runtime.executor.data.block.Block; +import org.apache.nemo.runtime.executor.data.block.CrailFileBlock; import org.apache.nemo.runtime.executor.data.block.FileBlock; import org.apache.nemo.runtime.executor.data.partition.NonSerializedPartition; import org.apache.nemo.runtime.executor.data.partition.SerializedPartition; @@ -341,14 +342,20 @@ public void run() { final Optional optionalBlock = getBlockStore(blockStore).readBlock(blockId); if (optionalBlock.isPresent()) { if (DataStoreProperty.Value.LocalFileStore.equals(blockStore) - || DataStoreProperty.Value.GlusterFileStore.equals(blockStore) - || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { + || DataStoreProperty.Value.GlusterFileStore.equals(blockStore)) { final List fileAreas = ((FileBlock) optionalBlock.get()).asFileAreas(keyRange); for (final FileArea fileArea : fileAreas) { try (ByteOutputContext.ByteOutputStream os = outputContext.newOutputStream()) { os.writeFileArea(fileArea); } } + } else if(DataStoreProperty.Value.CrailFileStore.equals(blockStore)){ + final List fileAreas = ((CrailFileBlock) optionalBlock.get()).asFileAreas(keyRange); + for (final FileArea fileArea : fileAreas) { + try (ByteOutputContext.ByteOutputStream os = outputContext.newOutputStream()){ + os.writeFileArea(fileArea); + } + } } else { final Iterable partitions = optionalBlock.get().readSerializedPartitions(keyRange); for (final SerializedPartition partition : partitions) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index b94a1ad953..ed0c756f25 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -45,7 +45,7 @@ * @param the key type of its partitions. */ @NotThreadSafe -public final class CrailFileBlock implements Block { +public final class CrailFileBlock implements Block{ private static final Logger LOG = LoggerFactory.getLogger(CrailFileBlock.class.getName()); private final String id; private final Map> nonCommittedPartitionsMap; @@ -249,7 +249,7 @@ public Iterable> readSerializedPartitions(final KeyRange // Deserialize the data final List> partitionsInRange = new ArrayList<>(); try { - try (final FileInputStream fileStream = new FileInputStream(filePath)) { + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { @@ -269,6 +269,8 @@ public Iterable> readSerializedPartitions(final KeyRange } } catch (final IOException e) { throw new BlockFetchException(e); + } catch (final Exception e2){ + e2.printStackTrace(); } return partitionsInRange; From 77761b7135183703e8122fcc15fcde9bfc36c9e9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Feb 2019 16:22:40 +0900 Subject: [PATCH 084/235] metadatafile path edit revert --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 36182b8d5c..80e83df4e2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -177,8 +177,7 @@ public static String blockIdToFilePath(final String blockId, */ public static String blockIdToMetaFilePath(final String blockId, final String fileDirectory) { - //return fileDirectory + "/" + blockId + "_meta"; - return "../crail_meta_dir" + blockId; + return fileDirectory + "/" + blockId + "_meta"; } /** From 85b0418121362437ec282f10d7eb0426479d3679 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Feb 2019 16:30:32 +0900 Subject: [PATCH 085/235] metadata revert --- .../runtime/executor/data/metadata/CrailFileMetadata.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 5cdcafcc15..6e8ce78546 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -55,7 +55,6 @@ public final class CrailFileMetadata extends FileMetadat private CrailFileMetadata(final String metaFilePath) { super(); this.metaFilePath = metaFilePath; - /* try { conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); @@ -72,7 +71,6 @@ private CrailFileMetadata(final String metaFilePath) { LOG.info("HY: CrailConfiguration failed"); e.printStackTrace(); } - */ } /** @@ -85,7 +83,6 @@ private CrailFileMetadata(final String metaFilePath, final List> partitionMetadataList) { super(partitionMetadataList); this.metaFilePath = metaFilePath; - /* try { try { conf = new CrailConfiguration(); @@ -101,7 +98,6 @@ private CrailFileMetadata(final String metaFilePath, LOG.info("HY: CrailConfiguration failed"); e.printStackTrace(); } - */ } /** From b0642ac06699eb0f921df3880f910612319edf25 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 26 Feb 2019 13:15:12 +0900 Subject: [PATCH 086/235] logging --- .../java/org/apache/nemo/runtime/executor/task/TaskExecutor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java index af31979999..56f4ab5059 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java @@ -299,6 +299,7 @@ irVertex, outputCollector, new TransformContextImpl(broadcastManagerWorker), * Process a data element down the DAG dependency. */ private void processElement(final OutputCollector outputCollector, final Object dataElement) { + LOG.info("HY: dataElement {}",dataElement); outputCollector.emit(dataElement); } From 5aabfe7f56f13827a44739e5f031564a1404595a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 26 Feb 2019 13:28:41 +0900 Subject: [PATCH 087/235] logging revert --- .../java/org/apache/nemo/runtime/executor/task/TaskExecutor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java index 56f4ab5059..af31979999 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/task/TaskExecutor.java @@ -299,7 +299,6 @@ irVertex, outputCollector, new TransformContextImpl(broadcastManagerWorker), * Process a data element down the DAG dependency. */ private void processElement(final OutputCollector outputCollector, final Object dataElement) { - LOG.info("HY: dataElement {}",dataElement); outputCollector.emit(dataElement); } From a4088855e09ba7a0e6c81e077e48808e84835db2 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 26 Feb 2019 16:28:29 +0900 Subject: [PATCH 088/235] debugging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ed0c756f25..f3bf49a6f6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -202,7 +202,7 @@ public Iterable> readPartitions(final KeyRange keyRang try { final List> partitionKeyBytesPairs = new ArrayList<>(); try{ - final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0); + final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { @@ -249,7 +249,7 @@ public Iterable> readSerializedPartitions(final KeyRange // Deserialize the data final List> partitionsInRange = new ArrayList<>(); try { - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 5dc80b0259..bd3f150f22 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -56,7 +56,6 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFi private final String fileDirectory; private CrailConfiguration conf = null; private CrailStore fs = null; - //CrailFile file = null; /** * Constructor. @@ -72,7 +71,6 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - //int host = fs.getLocationClass().value(); this.fileDirectory = volumeDirectory + "/files"; } From d78979fb6b4662976ea885bc4ec33a34cfba087a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 27 Feb 2019 12:48:12 +0900 Subject: [PATCH 089/235] metadata fix --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 6e8ce78546..84fabd3435 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -158,7 +158,6 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); - CrailFile file; try { file = fs.lookup(metaFilePath).get().asFile(); file.syncDir(); From 42e62c7507523139847e9a65432a21182c3a3b71 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 27 Feb 2019 13:08:00 +0900 Subject: [PATCH 090/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f3bf49a6f6..dae7eb05d2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -349,6 +349,7 @@ public synchronized Optional> commit() throws BlockWriteException { if (!metadata.isCommitted()) { commitPartitions(); metadata.commitBlock(); + LOG.info("HY: block and metadata commit for {}", id); } final List> partitionMetadataList = metadata.getPartitionMetadataList(); final Map partitionSizes = new HashMap<>(partitionMetadataList.size()); From f8872cbc3d8e74d2dfef5bedf4f024ced4508f8c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 27 Feb 2019 13:31:36 +0900 Subject: [PATCH 091/235] logging --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 84fabd3435..70a102bf1a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -119,6 +119,7 @@ public void deleteMetadata() throws IOException { */ @Override public synchronized void commitBlock() throws IOException { + LOG.info("HY: metadata commit for block {}", metaFilePath); final Iterable> partitionMetadataItr = getPartitionMetadataList(); try{ CrailBufferedOutputStream metaFileOutputstream =file.getBufferedOutputStream(0); @@ -133,6 +134,7 @@ public synchronized void commitBlock() throws IOException { } catch(Exception e){ LOG.info("HY: CrailBufferedOutputStream exception occurred"); + e.printStackTrace(); } setCommitted(true); } From 03d055254fb9437574f09b2f38afbf0dd5012a49 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 27 Feb 2019 14:35:18 +0900 Subject: [PATCH 092/235] metadata logic testing + logging --- .../data/metadata/CrailFileMetadata.java | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 70a102bf1a..127e2837f8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -55,22 +55,22 @@ public final class CrailFileMetadata extends FileMetadat private CrailFileMetadata(final String metaFilePath) { super(); this.metaFilePath = metaFilePath; - try { - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); - try { - this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - } catch (Exception e){ - //when it already exists - this.file = fs.lookup(metaFilePath).get().asFile(); - file.syncDir(); - } - } - catch(Exception e){ - LOG.info("HY: CrailConfiguration failed"); - e.printStackTrace(); - } +// try { +// conf = new CrailConfiguration(); +// fs = CrailStore.newInstance(conf); +// try { +// this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); +// file.syncDir(); +// } catch (Exception e){ +// //when it already exists +// this.file = fs.lookup(metaFilePath).get().asFile(); +// file.syncDir(); +// } +// } +// catch(Exception e){ +// LOG.info("HY: CrailConfiguration failed"); +// e.printStackTrace(); +// } } /** @@ -83,21 +83,21 @@ private CrailFileMetadata(final String metaFilePath, final List> partitionMetadataList) { super(partitionMetadataList); this.metaFilePath = metaFilePath; - try { - try { - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); - this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - } catch (Exception e) { - //when it already exists - this.file = fs.lookup(metaFilePath).get().asFile(); - file.syncDir(); - } - } catch(Exception e){ - LOG.info("HY: CrailConfiguration failed"); - e.printStackTrace(); - } +// try { +// try { +// conf = new CrailConfiguration(); +// fs = CrailStore.newInstance(conf); +// this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); +// file.syncDir(); +// } catch (Exception e) { +// //when it already exists +// this.file = fs.lookup(metaFilePath).get().asFile(); +// file.syncDir(); +// } +// } catch(Exception e){ +// LOG.info("HY: CrailConfiguration failed"); +// e.printStackTrace(); +// } } /** @@ -122,6 +122,11 @@ public synchronized void commitBlock() throws IOException { LOG.info("HY: metadata commit for block {}", metaFilePath); final Iterable> partitionMetadataItr = getPartitionMetadataList(); try{ + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + LOG.info("HY: metadata Crail file getting complete"); CrailBufferedOutputStream metaFileOutputstream =file.getBufferedOutputStream(0); for (PartitionMetadata partitionMetadata : partitionMetadataItr) { final byte[] key = SerializationUtils.serialize(partitionMetadata.getKey()); From 46617ed2facb159bef42338753c4348bd766a6e9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 27 Feb 2019 14:41:25 +0900 Subject: [PATCH 093/235] metadata logging --- .../executor/data/metadata/CrailFileMetadata.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 127e2837f8..b8448be092 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -46,7 +46,7 @@ public final class CrailFileMetadata extends FileMetadat private final String metaFilePath; private static CrailConfiguration conf; private static CrailStore fs; - private static CrailFile file; + private static CrailFile file=null; /** * Constructor for creating a non-committed new file metadata. * @@ -55,6 +55,12 @@ public final class CrailFileMetadata extends FileMetadat private CrailFileMetadata(final String metaFilePath) { super(); this.metaFilePath = metaFilePath; + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + }catch(Exception e){ + LOG.info("HY: CrailConfiguration failed"); + } // try { // conf = new CrailConfiguration(); // fs = CrailStore.newInstance(conf); @@ -83,6 +89,12 @@ private CrailFileMetadata(final String metaFilePath, final List> partitionMetadataList) { super(partitionMetadataList); this.metaFilePath = metaFilePath; + try { + conf = new CrailConfiguration(); + fs = CrailStore.newInstance(conf); + }catch(Exception e){ + LOG.info("HY: CrailConfiguration failed"); + } // try { // try { // conf = new CrailConfiguration(); From 27e79f0e9c917101e713694ea0af93641f3ddbe4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 28 Feb 2019 10:26:35 +0900 Subject: [PATCH 094/235] refactoring --- .../data/metadata/CrailFileMetadata.java | 49 ++----------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index b8448be092..ab1f018962 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -61,22 +61,6 @@ private CrailFileMetadata(final String metaFilePath) { }catch(Exception e){ LOG.info("HY: CrailConfiguration failed"); } -// try { -// conf = new CrailConfiguration(); -// fs = CrailStore.newInstance(conf); -// try { -// this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); -// file.syncDir(); -// } catch (Exception e){ -// //when it already exists -// this.file = fs.lookup(metaFilePath).get().asFile(); -// file.syncDir(); -// } -// } -// catch(Exception e){ -// LOG.info("HY: CrailConfiguration failed"); -// e.printStackTrace(); -// } } /** @@ -95,21 +79,6 @@ private CrailFileMetadata(final String metaFilePath, }catch(Exception e){ LOG.info("HY: CrailConfiguration failed"); } -// try { -// try { -// conf = new CrailConfiguration(); -// fs = CrailStore.newInstance(conf); -// this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); -// file.syncDir(); -// } catch (Exception e) { -// //when it already exists -// this.file = fs.lookup(metaFilePath).get().asFile(); -// file.syncDir(); -// } -// } catch(Exception e){ -// LOG.info("HY: CrailConfiguration failed"); -// e.printStackTrace(); -// } } /** @@ -136,10 +105,7 @@ public synchronized void commitBlock() throws IOException { try{ conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); - this.file = fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); - file.syncDir(); - LOG.info("HY: metadata Crail file getting complete"); - CrailBufferedOutputStream metaFileOutputstream =file.getBufferedOutputStream(0); + CrailBufferedOutputStream metaFileOutputstream =fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile().getBufferedOutputStream(0); for (PartitionMetadata partitionMetadata : partitionMetadataItr) { final byte[] key = SerializationUtils.serialize(partitionMetadata.getKey()); metaFileOutputstream.writeInt(key.length); @@ -177,16 +143,9 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); - try { - file = fs.lookup(metaFilePath).get().asFile(); - file.syncDir(); - } catch (Exception e) { - throw new IOException("HY: File "+metaFilePath+ " does not exist!"); - } final List> partitionMetadataList = new ArrayList<>(); - try ( - final CrailBufferedInputStream dataInputStream = file.getBufferedInputStream(0); - ) { + try { + CrailBufferedInputStream dataInputStream = fs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); while (dataInputStream.available() > 0) { final int keyLength = dataInputStream.readInt(); final byte[] desKey = new byte[keyLength]; @@ -201,6 +160,8 @@ public static CrailFileMetadata open(final String me ); partitionMetadataList.add(partitionMetadata); } + } catch (Exception e) { + throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); } From 96e74b86818ef0321e3704c498363d768f26e5dd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:19:59 +0900 Subject: [PATCH 095/235] inputstream close debugging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index dae7eb05d2..75ceb10b93 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -215,6 +215,7 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } + fileStream.close(); }catch(Exception e){ e.printStackTrace(); } @@ -260,7 +261,7 @@ public Iterable> readSerializedPartitions(final KeyRange throw new IOException("The read data size does not match with the partition size."); } partitionsInRange.add(new SerializedPartition<>( - key, serializedData, serializedData.length)); + key, serializedData, serializedData.length)); } else { // Have to skip this partition. skipBytes(fileStream, partitionmetadata.getPartitionSize()); From 7e57edc97b4884c49a3b3ae3df5bd6e29e92b786 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:40:50 +0900 Subject: [PATCH 096/235] inputstream skip debugging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 75ceb10b93..f68e7a2cd6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -201,8 +201,7 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try{ - final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680); + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { @@ -215,7 +214,7 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - fileStream.close(); + fileStream.skip(0); }catch(Exception e){ e.printStackTrace(); } @@ -267,7 +266,9 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } + fileStream.skip(0); } + } catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e2){ From cd547848fb261b0493ae2d751bd0f97bdd35e4c6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:47:12 +0900 Subject: [PATCH 097/235] inputstream skip debugging revert --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f68e7a2cd6..2a30d67bc7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,7 +214,6 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - fileStream.skip(0); }catch(Exception e){ e.printStackTrace(); } @@ -266,7 +265,6 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - fileStream.skip(0); } } catch (final IOException e) { From c2be06d4a6ae291d834e8c7932252d0b8942af44 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:49:17 +0900 Subject: [PATCH 098/235] inputstream skip debugging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 ++ .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 + 2 files changed, 3 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 2a30d67bc7..f68e7a2cd6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,6 +214,7 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } + fileStream.skip(0); }catch(Exception e){ e.printStackTrace(); } @@ -265,6 +266,7 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } + fileStream.skip(0); } } catch (final IOException e) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index ab1f018962..7254880245 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -160,6 +160,7 @@ public static CrailFileMetadata open(final String me ); partitionMetadataList.add(partitionMetadata); } + dataInputStream.skip(0); } catch (Exception e) { throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } From a3d4f802f2d472540072e26733f4bf9ca4051f0b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:54:21 +0900 Subject: [PATCH 099/235] inputstream skip debugging revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 +--- .../runtime/executor/data/metadata/CrailFileMetadata.java | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f68e7a2cd6..34d16337e7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,7 +214,7 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - fileStream.skip(0); + fileStream.close(); }catch(Exception e){ e.printStackTrace(); } @@ -266,9 +266,7 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - fileStream.skip(0); } - } catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e2){ diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 7254880245..ab1f018962 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -160,7 +160,6 @@ public static CrailFileMetadata open(final String me ); partitionMetadataList.add(partitionMetadata); } - dataInputStream.skip(0); } catch (Exception e) { throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } From cc1b147fe3e306b7075baf4132f3c8e448e4e403 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 10:57:37 +0900 Subject: [PATCH 100/235] metadata logging --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index ab1f018962..d8bfa70365 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -147,6 +147,7 @@ public static CrailFileMetadata open(final String me try { CrailBufferedInputStream dataInputStream = fs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); while (dataInputStream.available() > 0) { + LOG.info("HY: metadata available"); final int keyLength = dataInputStream.readInt(); final byte[] desKey = new byte[keyLength]; if (keyLength != dataInputStream.read(desKey)) { From e62fbc7cfe16641fb75e322be744d54b7cc37860 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 11:14:14 +0900 Subject: [PATCH 101/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 34d16337e7..482a4c742c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,6 +204,7 @@ public Iterable> readPartitions(final KeyRange keyRang try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); + LOG.info("HY: key fetched this time: {}", key.toString()); if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; @@ -214,7 +215,6 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - fileStream.close(); }catch(Exception e){ e.printStackTrace(); } From 829eb576fb9651b2a733e6c6ae510116ec0e44a6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 11:27:53 +0900 Subject: [PATCH 102/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 482a4c742c..ea1a84242a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,7 +204,7 @@ public Iterable> readPartitions(final KeyRange keyRang try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); - LOG.info("HY: key fetched this time: {}", key.toString()); + LOG.info("HY: key Range: {}", keyRange.toString()); if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; From e93871f9369022a707aeaf04bec0d0a84b46de8b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 11:40:39 +0900 Subject: [PATCH 103/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ea1a84242a..5900d955d0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,12 +204,13 @@ public Iterable> readPartitions(final KeyRange keyRang try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); - LOG.info("HY: key Range: {}", keyRange.toString()); + //HY: key fetch and range okay if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + LOG.info("HY: key bytes pairs from file: {}",partitionKeyBytesPairs.toString()); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); From ab84f1a008d97db903d25680d45b71229db47035 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 11:51:44 +0900 Subject: [PATCH 104/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 +-- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 5900d955d0..71960af69d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,13 +204,11 @@ public Iterable> readPartitions(final KeyRange keyRang try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); - //HY: key fetch and range okay if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - LOG.info("HY: key bytes pairs from file: {}",partitionKeyBytesPairs.toString()); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); @@ -224,6 +222,7 @@ public Iterable> readPartitions(final KeyRange keyRang DataUtil.deserializePartition( partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), new ByteArrayInputStream(partitionKeyBytes.right())); + LOG.info("HY: deserializedPartition {}", deserializedPartitions.toString()); deserializedPartitions.add(deserializePartition); } } catch (final IOException e) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index d8bfa70365..ab1f018962 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -147,7 +147,6 @@ public static CrailFileMetadata open(final String me try { CrailBufferedInputStream dataInputStream = fs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); while (dataInputStream.available() > 0) { - LOG.info("HY: metadata available"); final int keyLength = dataInputStream.readInt(); final byte[] desKey = new byte[keyLength]; if (keyLength != dataInputStream.read(desKey)) { From c4606f1ae29e436e212154b393bdd00b6db0a534 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 11:59:26 +0900 Subject: [PATCH 105/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 71960af69d..c41a3a3001 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -222,7 +222,7 @@ public Iterable> readPartitions(final KeyRange keyRang DataUtil.deserializePartition( partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), new ByteArrayInputStream(partitionKeyBytes.right())); - LOG.info("HY: deserializedPartition {}", deserializedPartitions.toString()); + LOG.info("HY: deserializedPartition " + deserializedPartitions.toArray()); deserializedPartitions.add(deserializePartition); } } catch (final IOException e) { From 61561c1fe949a858b9db0cfc266d96f3ca23cb23 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 12:08:11 +0900 Subject: [PATCH 106/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index c41a3a3001..da8207e4a2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -213,6 +213,7 @@ public Iterable> readPartitions(final KeyRange keyRang // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); } + LOG.info("HY: partition size: {}",partitionKeyBytesPairs.size()); } }catch(Exception e){ e.printStackTrace(); @@ -222,7 +223,6 @@ public Iterable> readPartitions(final KeyRange keyRang DataUtil.deserializePartition( partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), new ByteArrayInputStream(partitionKeyBytes.right())); - LOG.info("HY: deserializedPartition " + deserializedPartitions.toArray()); deserializedPartitions.add(deserializePartition); } } catch (final IOException e) { From 7d006d8fef4d86186535ed315cd46d30dd1224cd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 12:12:54 +0900 Subject: [PATCH 107/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index da8207e4a2..caec47b7a6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -213,9 +213,9 @@ public Iterable> readPartitions(final KeyRange keyRang // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); } - LOG.info("HY: partition size: {}",partitionKeyBytesPairs.size()); } - }catch(Exception e){ + LOG.info("HY: partition size: {}",partitionKeyBytesPairs.size()); + }catch(Exception e){ e.printStackTrace(); } for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { From eac61e1c36f45b3868331332097a6e5fcf62686c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 12:21:38 +0900 Subject: [PATCH 108/235] logging --- .../apache/nemo/runtime/executor/data/BlockManagerWorker.java | 2 +- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 5d5f3a30a1..d35402c84b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -424,7 +424,7 @@ private CompletableFuture getDataFromLocalBlock( numSerializedBytes += partition.getNumSerializedBytes(); numEncodedBytes += partition.getNumEncodedBytes(); } - + LOG.info("HY: numSerializedBytes {}, numEncodedBytes {}", numSerializedBytes, numEncodedBytes); return CompletableFuture.completedFuture(DataUtil.IteratorWithNumBytes.of(innerIterator, numSerializedBytes, numEncodedBytes)); } catch (final DataUtil.IteratorWithNumBytes.NumBytesNotSupportedException e) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index caec47b7a6..5327dc1768 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,7 +214,6 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - LOG.info("HY: partition size: {}",partitionKeyBytesPairs.size()); }catch(Exception e){ e.printStackTrace(); } From 8ea5cf50ee56d298f3a2933322291ffbf748a997 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 12:32:37 +0900 Subject: [PATCH 109/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 5327dc1768..bf00fffc89 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -224,6 +224,7 @@ public Iterable> readPartitions(final KeyRange keyRang new ByteArrayInputStream(partitionKeyBytes.right())); deserializedPartitions.add(deserializePartition); } + LOG.info("HY: deserializedPartitions size: {}",deserializedPartitions.size()); } catch (final IOException e) { throw new BlockFetchException(e); } From fb7b74eceeb5e324100b9f1347d354974ea77b12 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 13:38:28 +0900 Subject: [PATCH 110/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index bf00fffc89..2dc35477b2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,7 +207,9 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + int test; + test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + LOG.info("HY: test value {}",test); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 29a35a6e2c28ab4ff8b38c18394075ec8be71dd4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 13:50:51 +0900 Subject: [PATCH 111/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 2dc35477b2..da07dff7b7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -210,6 +210,7 @@ public Iterable> readPartitions(final KeyRange keyRang int test; test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: test value {}",test); + LOG.info("HY: partition length: {}", partitionBytes.length); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 93efe7b2d517776bbb9a4cc196bb0c413c018d57 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 14:08:11 +0900 Subject: [PATCH 112/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index da07dff7b7..f408f9aab2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -210,7 +210,7 @@ public Iterable> readPartitions(final KeyRange keyRang int test; test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: test value {}",test); - LOG.info("HY: partition length: {}", partitionBytes.length); + LOG.info("HY: partition length: {}", partitionBytes.length); //checked partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. @@ -225,6 +225,7 @@ public Iterable> readPartitions(final KeyRange keyRang DataUtil.deserializePartition( partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), new ByteArrayInputStream(partitionKeyBytes.right())); + LOG.info("HY: deserializePartition {}", deserializePartition.getData().iterator().next()); deserializedPartitions.add(deserializePartition); } LOG.info("HY: deserializedPartitions size: {}",deserializedPartitions.size()); From ed2a348da60ea70414abc34a36009e858721ba65 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sat, 2 Mar 2019 14:12:47 +0900 Subject: [PATCH 113/235] logging revert --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f408f9aab2..75cf029699 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -225,7 +225,6 @@ public Iterable> readPartitions(final KeyRange keyRang DataUtil.deserializePartition( partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), new ByteArrayInputStream(partitionKeyBytes.right())); - LOG.info("HY: deserializePartition {}", deserializePartition.getData().iterator().next()); deserializedPartitions.add(deserializePartition); } LOG.info("HY: deserializedPartitions size: {}",deserializedPartitions.size()); From 3133ee87f1a5f30388b48ec89a8dfa9fcab1ec8a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 16:46:27 +0900 Subject: [PATCH 114/235] logging edit --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 75cf029699..56af53fec7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -82,7 +82,6 @@ public CrailFileBlock(final String blockId, file.syncDir(); LOG.info("HY: crail file block created"); } catch (Exception e1) { - LOG.info("HY: crail file block creation might have failed"); try{ this.fs = fs; this.file = fs.lookup(filePath).get().asFile(); From 8edcb4dbb22debb0708feff3c770feb635d5f08d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 19:29:11 +0900 Subject: [PATCH 115/235] skip --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 56af53fec7..a1696b2f59 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -268,6 +268,7 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } + fileStream.skip(0); } } catch (final IOException e) { throw new BlockFetchException(e); From f66b0e5fd04e1f9e1b6b5d4f90caefb6fcfbc0dc Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 19:39:50 +0900 Subject: [PATCH 116/235] skip revert --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index a1696b2f59..56af53fec7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -268,7 +268,6 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - fileStream.skip(0); } } catch (final IOException e) { throw new BlockFetchException(e); From 63781b5fd85e8134573c6184f68017b7e8638eec Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 19:49:51 +0900 Subject: [PATCH 117/235] seek!!!!! --- .../runtime/executor/data/block/CrailFileBlock.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 56af53fec7..c187fdcaad 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -200,7 +200,7 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)){ + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { @@ -216,6 +216,7 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } + fileStream.seek(0); }catch(Exception e){ e.printStackTrace(); } @@ -250,8 +251,7 @@ public Iterable> readSerializedPartitions(final KeyRange } else { // Deserialize the data final List> partitionsInRange = new ArrayList<>(); - try { - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(16807680)) { + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { @@ -268,8 +268,8 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - } - } catch (final IOException e) { + fileStream.seek(0); + }catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e2){ e2.printStackTrace(); From 2ed63db48b05f118084c43c56bb4840ce46994b9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 20:01:41 +0900 Subject: [PATCH 118/235] seek!!!!! --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index c187fdcaad..d128145715 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -201,7 +201,8 @@ public Iterable> readPartitions(final KeyRange keyRang try { final List> partitionKeyBytesPairs = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { + fileStream.seek(0); + for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { // The key value of this partition is in the range. @@ -216,7 +217,6 @@ public Iterable> readPartitions(final KeyRange keyRang skipBytes(fileStream, partitionMetadata.getPartitionSize()); } } - fileStream.seek(0); }catch(Exception e){ e.printStackTrace(); } @@ -253,6 +253,7 @@ public Iterable> readSerializedPartitions(final KeyRange final List> partitionsInRange = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { + fileStream.seek(0); final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { // The hash value of this partition is in the range. @@ -268,7 +269,6 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - fileStream.seek(0); }catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e2){ From d40eb476a88a6db1e444166edb73c171a72b45ed Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 20:20:49 +0900 Subject: [PATCH 119/235] logging --- .../runtime/executor/data/block/CrailFileBlock.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index d128145715..f796ca2208 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -228,6 +228,17 @@ public Iterable> readPartitions(final KeyRange keyRang deserializedPartitions.add(deserializePartition); } LOG.info("HY: deserializedPartitions size: {}",deserializedPartitions.size()); + long numSerializedBytes=0; + long numEncodedBytes=0; + for (final NonSerializedPartition partition : deserializedPartitions) { + try { + numSerializedBytes += partition.getNumSerializedBytes(); + numEncodedBytes += partition.getNumEncodedBytes(); + }catch(Exception e1){ + e1.printStackTrace(); + } + } + LOG.info("HY: numSerializedBytes: {}, numEncodedBytes: {}", numSerializedBytes, numEncodedBytes); } catch (final IOException e) { throw new BlockFetchException(e); } From ec45fbe3d354c0d04446b85d5b5888a2e3447c1f Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 20:48:48 +0900 Subject: [PATCH 120/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f796ca2208..740d17af81 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -210,8 +210,11 @@ public Iterable> readPartitions(final KeyRange keyRang int test; test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: test value {}",test); - LOG.info("HY: partition length: {}", partitionBytes.length); //checked + //LOG.info("HY: partition length: {}", partitionBytes.length); //no use. size is fixed when created partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + for(int i=0;i<10;i++){ + LOG.info("HY: partitionBytes[",i,"] : {}", partitionBytes[i]); + } } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); From 20d77a8baba3ca5eed6c912e133f49983075559b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 20:53:26 +0900 Subject: [PATCH 121/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 740d17af81..47327c07f7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -213,7 +213,7 @@ public Iterable> readPartitions(final KeyRange keyRang //LOG.info("HY: partition length: {}", partitionBytes.length); //no use. size is fixed when created partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); for(int i=0;i<10;i++){ - LOG.info("HY: partitionBytes[",i,"] : {}", partitionBytes[i]); + LOG.info("HY: partitionBytes : {}", partitionBytes[i]); } } else { // Have to skip this partition. From c3327cfb0f91368add6ccff965d385e8887dc6a0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 21:04:52 +0900 Subject: [PATCH 122/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 47327c07f7..e218be53ff 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,13 +207,16 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + for(int i=0;i<10;i++){ + LOG.info("HY: partitionBytes before: {}", partitionBytes[i]); + } int test; test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: test value {}",test); //LOG.info("HY: partition length: {}", partitionBytes.length); //no use. size is fixed when created partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); for(int i=0;i<10;i++){ - LOG.info("HY: partitionBytes : {}", partitionBytes[i]); + LOG.info("HY: partitionBytes after: {}", partitionBytes[i]); } } else { // Have to skip this partition. From c4a23c29773e1c4d25d66fee0e85828ee68092d3 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 4 Mar 2019 21:23:46 +0900 Subject: [PATCH 123/235] logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index e218be53ff..9c51438d50 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,17 +207,11 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - for(int i=0;i<10;i++){ - LOG.info("HY: partitionBytes before: {}", partitionBytes[i]); - } int test; test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: test value {}",test); //LOG.info("HY: partition length: {}", partitionBytes.length); //no use. size is fixed when created partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - for(int i=0;i<10;i++){ - LOG.info("HY: partitionBytes after: {}", partitionBytes[i]); - } } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); @@ -269,8 +263,8 @@ public Iterable> readSerializedPartitions(final KeyRange // Deserialize the data final List> partitionsInRange = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { + fileStream.seek(0); for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { - fileStream.seek(0); final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { // The hash value of this partition is in the range. From bf045958e785623ee0d9a477203f6315c4981a95 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 6 Mar 2019 11:18:35 +0900 Subject: [PATCH 124/235] logging --- .../executor/data/block/CrailFileBlock.java | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 9c51438d50..932a338470 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,13 +204,13 @@ public Iterable> readPartitions(final KeyRange keyRang fileStream.seek(0); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); + LOG.info("HY: metadata: {}", partitionMetadata.toString()); + LOG.info("HY: keyrange: {}", keyRange.toString()); + LOG.info("HY: key: {}", key.toString()); if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - int test; - test = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - LOG.info("HY: test value {}",test); - //LOG.info("HY: partition length: {}", partitionBytes.length); //no use. size is fixed when created + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. @@ -227,18 +227,6 @@ public Iterable> readPartitions(final KeyRange keyRang new ByteArrayInputStream(partitionKeyBytes.right())); deserializedPartitions.add(deserializePartition); } - LOG.info("HY: deserializedPartitions size: {}",deserializedPartitions.size()); - long numSerializedBytes=0; - long numEncodedBytes=0; - for (final NonSerializedPartition partition : deserializedPartitions) { - try { - numSerializedBytes += partition.getNumSerializedBytes(); - numEncodedBytes += partition.getNumEncodedBytes(); - }catch(Exception e1){ - e1.printStackTrace(); - } - } - LOG.info("HY: numSerializedBytes: {}, numEncodedBytes: {}", numSerializedBytes, numEncodedBytes); } catch (final IOException e) { throw new BlockFetchException(e); } From 4e5dfcf07d6e23bca049f5cdbbf6b3b1a3f0ea80 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 6 Mar 2019 15:19:49 +0900 Subject: [PATCH 125/235] logging revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 932a338470..bf244e41d8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -204,9 +204,6 @@ public Iterable> readPartitions(final KeyRange keyRang fileStream.seek(0); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); - LOG.info("HY: metadata: {}", partitionMetadata.toString()); - LOG.info("HY: keyrange: {}", keyRange.toString()); - LOG.info("HY: key: {}", key.toString()); if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; From 296a22ddb0ae0cb213e1f46c574fe32267493b20 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 7 Mar 2019 16:28:36 +0900 Subject: [PATCH 126/235] partition size logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index bf244e41d8..d67939cc72 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,6 +207,7 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; + LOG.info("HY: partition length of the blcok to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { From 9db6292f881e88e769b7a9e2fe6aa3ce7f7cdd92 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 7 Mar 2019 16:57:25 +0900 Subject: [PATCH 127/235] skip logging --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index d67939cc72..ccf2f3efaa 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,12 +207,13 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - LOG.info("HY: partition length of the blcok to read {}", partitionMetadata.getPartitionSize()); + LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); + LOG.info("HY: partition skipped"); } } }catch(Exception e){ From 7aea8b7a75430d22a6e02b4b299bdb81a68af475 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 15:53:47 +0900 Subject: [PATCH 128/235] skipbytes erased --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ccf2f3efaa..31f85408d4 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -212,8 +212,8 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); - LOG.info("HY: partition skipped"); + //skipBytes(fileStream, partitionMetadata.getPartitionSize()); + //LOG.info("HY: partition skipped"); } } }catch(Exception e){ From fe6e5d024feb0b2c6b77d309298e0ab2aa80554c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 16:14:33 +0900 Subject: [PATCH 129/235] revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 31f85408d4..ccf2f3efaa 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -212,8 +212,8 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - //skipBytes(fileStream, partitionMetadata.getPartitionSize()); - //LOG.info("HY: partition skipped"); + skipBytes(fileStream, partitionMetadata.getPartitionSize()); + LOG.info("HY: partition skipped"); } } }catch(Exception e){ From 231cddf5a640602d2e2e5a8d260f543ae3cd80b1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 16:50:39 +0900 Subject: [PATCH 130/235] test --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ccf2f3efaa..31f85408d4 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -212,8 +212,8 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); - LOG.info("HY: partition skipped"); + //skipBytes(fileStream, partitionMetadata.getPartitionSize()); + //LOG.info("HY: partition skipped"); } } }catch(Exception e){ From b1e4ab02c2583fcdadb96711d4fb76ddfb547b0c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 17:53:25 +0900 Subject: [PATCH 131/235] revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 31f85408d4..ccf2f3efaa 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -212,8 +212,8 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - //skipBytes(fileStream, partitionMetadata.getPartitionSize()); - //LOG.info("HY: partition skipped"); + skipBytes(fileStream, partitionMetadata.getPartitionSize()); + LOG.info("HY: partition skipped"); } } }catch(Exception e){ From dd4066b00a77f2c85548f83531b83e68e2007791 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 18:15:32 +0900 Subject: [PATCH 132/235] logging inputstream position --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ccf2f3efaa..d6a67a2a19 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,7 +208,9 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); + LOG.info("HY: inputstream position before read {}", fileStream.position()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + LOG.info("HY: inputstream position after read {}", fileStream.position()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 2215fef0c1df8f41413569e4b9d60c7fe324aad1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 18:48:39 +0900 Subject: [PATCH 133/235] testing substitution of skip with read --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index d6a67a2a19..f05c93d09e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,7 +214,9 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); + byte[] dummyBytes = new byte[partitionMetadata.getPartitionSize()]; + fileStream.read(dummyBytes, 0, partitionMetadata.getPartitionSize()); + //skipBytes(fileStream, partitionMetadata.getPartitionSize()); LOG.info("HY: partition skipped"); } } From bf7447053774b258457b40041c5b88c2a1728171 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 18:53:57 +0900 Subject: [PATCH 134/235] revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f05c93d09e..d6a67a2a19 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -214,9 +214,7 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - byte[] dummyBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(dummyBytes, 0, partitionMetadata.getPartitionSize()); - //skipBytes(fileStream, partitionMetadata.getPartitionSize()); + skipBytes(fileStream, partitionMetadata.getPartitionSize()); LOG.info("HY: partition skipped"); } } From 5842d926ca2cf0cf888faa3a3acc59d4d9d1efaf Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 19:11:37 +0900 Subject: [PATCH 135/235] seek test --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index d6a67a2a19..12067dc33c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -201,7 +201,7 @@ public Iterable> readPartitions(final KeyRange keyRang try { final List> partitionKeyBytesPairs = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ - fileStream.seek(0); + fileStream.seek(40176); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { From fdc2f2bcc129ac2aacf79c9c5697cc965ffab12b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 19:16:48 +0900 Subject: [PATCH 136/235] seek test --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 12067dc33c..816cb06ed2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -201,7 +201,7 @@ public Iterable> readPartitions(final KeyRange keyRang try { final List> partitionKeyBytesPairs = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ - fileStream.seek(40176); + fileStream.seek(100); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { From 6b673a8dc611f75f0b595033dda3c383b146f5c1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 11 Mar 2019 19:22:33 +0900 Subject: [PATCH 137/235] revert --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 816cb06ed2..ff638f3ba8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -201,7 +201,6 @@ public Iterable> readPartitions(final KeyRange keyRang try { final List> partitionKeyBytesPairs = new ArrayList<>(); try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ - fileStream.seek(100); for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { From 836b8ccf47958af5f963e84487dbd0261253b8e1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 12 Mar 2019 10:09:03 +0900 Subject: [PATCH 138/235] capacity set --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index ff638f3ba8..da573395b9 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -200,7 +200,7 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)){ + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())){ for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { From 98f8c62731d98a092ea68f195c9d123e3d1f1e1b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 12 Mar 2019 10:35:42 +0900 Subject: [PATCH 139/235] seek --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index da573395b9..e2a1c21a2b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -207,13 +207,12 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); - LOG.info("HY: inputstream position before read {}", fileStream.position()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - LOG.info("HY: inputstream position after read {}", fileStream.position()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); + fileStream.seek(partitionMetadata.getPartitionSize()); + //skipBytes(fileStream, partitionMetadata.getPartitionSize()); LOG.info("HY: partition skipped"); } } From bf0a06e9e0a5b34df1b446b838a372b14c0f9fca Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 12 Mar 2019 10:40:19 +0900 Subject: [PATCH 140/235] revert --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index e2a1c21a2b..f9596fcb42 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -211,8 +211,7 @@ public Iterable> readPartitions(final KeyRange keyRang partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. - fileStream.seek(partitionMetadata.getPartitionSize()); - //skipBytes(fileStream, partitionMetadata.getPartitionSize()); + skipBytes(fileStream, partitionMetadata.getPartitionSize()); LOG.info("HY: partition skipped"); } } From 5cab2078c608dcc0d1e7020f662fa410037667ac Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 15:28:45 +0900 Subject: [PATCH 141/235] logging partitionbytes --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index f9596fcb42..2466ca5df1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,6 +208,7 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From b7d321662bfbadbd194297d30640e648f584762c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 15:36:28 +0900 Subject: [PATCH 142/235] logging partitionbytes --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 2466ca5df1..edad2d005c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,7 +208,7 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); + if(filePath=="/edge11-9-0") LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 8eb47d96e24a8b0fc73ac236971511358744f15f Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 15:43:39 +0900 Subject: [PATCH 143/235] logging partitionbytes --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index edad2d005c..87c69971b3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,7 +208,7 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - if(filePath=="/edge11-9-0") LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); + if(filePath=="/tmp_crail/files/edge11-9-0") LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From db3ea064fd5270a352027cd0ee96512e5cf1d8d5 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 15:48:50 +0900 Subject: [PATCH 144/235] logging partitionbytes --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 87c69971b3..3332e48459 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,7 +208,7 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - if(filePath=="/tmp_crail/files/edge11-9-0") LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); + if(filePath.equals("/tmp_crail/files/edge11-9-0")) LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 2da390f6a7f6ec5cadc07d28bed125ecd0a0c1fd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 16:08:25 +0900 Subject: [PATCH 145/235] sync test --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 3332e48459..556be795bd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -85,7 +85,6 @@ public CrailFileBlock(final String blockId, try{ this.fs = fs; this.file = fs.lookup(filePath).get().asFile(); - file.syncDir(); LOG.info("HY: {} fetched", blockId); } catch(Exception e2){ From 34d5809c856808cf89212bf4fc469f9c727cfa6b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 16:29:30 +0900 Subject: [PATCH 146/235] logging data read --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 556be795bd..e33186b6d9 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -206,8 +206,10 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); + final NonSerializedPartition data = DataUtil.deserializePartition(partitionBytes.length, serializer, key, new ByteArrayInputStream(partitionBytes)); + LOG.info("HY: data NumEncodedBytes: {}",data.getNumEncodedBytes()); + LOG.info("HY: data SerializedBytes: {}", data.getNumSerializedBytes()); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - if(filePath.equals("/tmp_crail/files/edge11-9-0")) LOG.info("HY: partitionBytes data test:: \n"+ Arrays.toString(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 71383369cbf87d054fc7cb9a3aea816d9f119464 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 16:34:46 +0900 Subject: [PATCH 147/235] logging data read --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index e33186b6d9..4ddb69a9f1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -206,10 +206,10 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); final NonSerializedPartition data = DataUtil.deserializePartition(partitionBytes.length, serializer, key, new ByteArrayInputStream(partitionBytes)); LOG.info("HY: data NumEncodedBytes: {}",data.getNumEncodedBytes()); LOG.info("HY: data SerializedBytes: {}", data.getNumSerializedBytes()); - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 533f34295ae7e82df05a0cebad2fbf5c4c0aff93 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 16:49:47 +0900 Subject: [PATCH 148/235] seek test --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 4ddb69a9f1..6de9e82ca3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -206,9 +206,10 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); + fileStream.seek(1); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); final NonSerializedPartition data = DataUtil.deserializePartition(partitionBytes.length, serializer, key, new ByteArrayInputStream(partitionBytes)); - LOG.info("HY: data NumEncodedBytes: {}",data.getNumEncodedBytes()); + LOG.info("HY: data NumEncodedBytes: {}", data.getNumEncodedBytes()); LOG.info("HY: data SerializedBytes: {}", data.getNumSerializedBytes()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { From 984f249599c6a9782d65d6be40841490b647cf9e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 18:37:39 +0900 Subject: [PATCH 149/235] refactoring --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 6de9e82ca3..7ceb4e035a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -206,11 +206,7 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); - fileStream.seek(1); fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - final NonSerializedPartition data = DataUtil.deserializePartition(partitionBytes.length, serializer, key, new ByteArrayInputStream(partitionBytes)); - LOG.info("HY: data NumEncodedBytes: {}", data.getNumEncodedBytes()); - LOG.info("HY: data SerializedBytes: {}", data.getNumSerializedBytes()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. @@ -251,8 +247,7 @@ public Iterable> readSerializedPartitions(final KeyRange } else { // Deserialize the data final List> partitionsInRange = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(0)) { - fileStream.seek(0); + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { From 00c2ab4f14573b58777468032af03db55ad2d91c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 19:07:02 +0900 Subject: [PATCH 150/235] compression/decompression disabled --- .../pass/compiletime/composite/DefaultCompositePass.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java index 52992164e6..2586156429 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java @@ -40,8 +40,8 @@ public DefaultCompositePass() { new DefaultDataStorePass(), new DefaultDataPersistencePass(), new DefaultScheduleGroupPass(), - new CompressionPass(), - new DecompressionPass(), +// new CompressionPass(), +// new DecompressionPass(), new ResourceLocalityPass(), new ResourceSitePass(), new ResourceSlotPass() From bf3e5a2d80008580bd6bc2a60e1804ce2dff6a54 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 13 Mar 2019 19:28:22 +0900 Subject: [PATCH 151/235] watermark disabled --- .../datatransfer/NemoEventDecoderFactory.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java index 1367e7084b..e002703271 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java @@ -73,13 +73,16 @@ public Object decode() throws IOException { // this is not a watermark return valueDecoder.decode(); } else if (isWatermark == 0x01) { - // this is a watermark - final WatermarkWithIndex watermarkWithIndex = - (WatermarkWithIndex) SerializationUtils.deserialize(inputStream); - return watermarkWithIndex; - } else { - throw new RuntimeException("Watermark decoding failure: " + isWatermark); + return valueDecoder.decode(); } + else return valueDecoder.decode(); + // this is a watermark +// final WatermarkWithIndex watermarkWithIndex = +// (WatermarkWithIndex) SerializationUtils.deserialize(inputStream); +// return watermarkWithIndex; +// } else { +// throw new RuntimeException("Watermark decoding failure: " + isWatermark); +// } } @Override From 6345cbf12bf8a1455be5dc740c3d90caa4db959a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Mar 2019 17:13:00 +0900 Subject: [PATCH 152/235] log --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 7ceb4e035a..5ff5ca2455 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -206,7 +206,8 @@ public Iterable> readPartitions(final KeyRange keyRang // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + int readBytes = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); + LOG.info("HY: readBytes: {}",readBytes); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 98d8f37b4304f2dac5e7d1a6cf940b0ff01f8c36 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Mar 2019 17:29:50 +0900 Subject: [PATCH 153/235] log --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 5ff5ca2455..3a429d05fc 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -208,7 +208,13 @@ public Iterable> readPartitions(final KeyRange keyRang LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); int readBytes = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: readBytes: {}",readBytes); + final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( + partitionBytes.length, serializer, key, + new ByteArrayInputStream(partitionBytes)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + + deserializePartition.getData().forEach(data -> + LOG.info("deser {}", data)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); From 5fe266f027ba8119f7da626cd7b7247274d2d816 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 14 Mar 2019 17:44:46 +0900 Subject: [PATCH 154/235] log --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 3 +-- .../apache/nemo/runtime/executor/data/block/FileBlock.java | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 3a429d05fc..e40550e751 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -211,10 +211,9 @@ public Iterable> readPartitions(final KeyRange keyRang final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( partitionBytes.length, serializer, key, new ByteArrayInputStream(partitionBytes)); - partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - deserializePartition.getData().forEach(data -> LOG.info("deser {}", data)); + partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index a833100bfd..e90ac9567c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -187,6 +187,11 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); + final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( + partitionBytes.length, serializer, key, + new ByteArrayInputStream(partitionBytes)); + deserializePartition.getData().forEach(data -> + LOG.info("deser {}", data)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); From cbc289ba13549724561192f332bc54538106499a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 17 Mar 2019 18:18:30 +0900 Subject: [PATCH 155/235] revert watermark & Compression/Decompression disable --- .../composite/DefaultCompositePass.java | 4 ++-- .../datatransfer/NemoEventDecoderFactory.java | 15 ++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java index 2586156429..52992164e6 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/composite/DefaultCompositePass.java @@ -40,8 +40,8 @@ public DefaultCompositePass() { new DefaultDataStorePass(), new DefaultDataPersistencePass(), new DefaultScheduleGroupPass(), -// new CompressionPass(), -// new DecompressionPass(), + new CompressionPass(), + new DecompressionPass(), new ResourceLocalityPass(), new ResourceSitePass(), new ResourceSlotPass() diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java index e002703271..1367e7084b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/NemoEventDecoderFactory.java @@ -73,16 +73,13 @@ public Object decode() throws IOException { // this is not a watermark return valueDecoder.decode(); } else if (isWatermark == 0x01) { - return valueDecoder.decode(); - } - else return valueDecoder.decode(); // this is a watermark -// final WatermarkWithIndex watermarkWithIndex = -// (WatermarkWithIndex) SerializationUtils.deserialize(inputStream); -// return watermarkWithIndex; -// } else { -// throw new RuntimeException("Watermark decoding failure: " + isWatermark); -// } + final WatermarkWithIndex watermarkWithIndex = + (WatermarkWithIndex) SerializationUtils.deserialize(inputStream); + return watermarkWithIndex; + } else { + throw new RuntimeException("Watermark decoding failure: " + isWatermark); + } } @Override From 8012c2b858315ec0970c0f1e6f536efcab56e600 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 14:16:52 +0900 Subject: [PATCH 156/235] policy edit --- .../org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java index 8336de7d5b..14312f3d37 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java @@ -34,7 +34,6 @@ public final class CrailPolicy implements Policy { public static final PolicyBuilder BUILDER = new PolicyBuilder() .registerCompileTimePass(new CrailEdgeDataStorePass()) //***확인 - .registerCompileTimePass(new LoopOptimizationCompositePass()) .registerCompileTimePass(new DefaultCompositePass()); private final Policy policy; From 42053793b47418583ca11d5fcef732e3159bb2bb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 14:34:09 +0900 Subject: [PATCH 157/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 80e83df4e2..26fd557dc8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -91,6 +91,7 @@ public static NonSerializedPartition deserializePartiti final InputStreamIterator iterator = new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); iterator.forEachRemaining(deserializedData::add); + deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), iterator.getNumEncodedBytes()); } From e76d33d22dfa9c2c55674122089f82b1c0fb6e4f Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 15:07:11 +0900 Subject: [PATCH 158/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 26fd557dc8..15e1114320 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -91,6 +91,7 @@ public static NonSerializedPartition deserializePartiti final InputStreamIterator iterator = new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); iterator.forEachRemaining(deserializedData::add); + iterator.forEachRemaining(data -> LOG.info("iterator check")); deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), iterator.getNumEncodedBytes()); From 3d18ce9e6ec1934bdcc32987196c52d4da35d04e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 15:15:11 +0900 Subject: [PATCH 159/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 15e1114320..5b56ddff4a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -29,6 +29,7 @@ import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import sun.reflect.annotation.ExceptionProxy; import java.io.*; import java.util.*; @@ -96,6 +97,10 @@ public static NonSerializedPartition deserializePartiti return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), iterator.getNumEncodedBytes()); } + catch(Exception e){ + e.printStackTrace(); + return null; + } } /** From dc28f5dfa4e5a7009914439dcf667c3a70e11326 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 15:38:09 +0900 Subject: [PATCH 160/235] unable limitedInputStream --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 5b56ddff4a..c904e06c56 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -90,7 +90,7 @@ public static NonSerializedPartition deserializePartiti // reading input from chained compression InputStream. try (final LimitedInputStream limitedInputStream = new LimitedInputStream(inputStream, partitionSize)) { final InputStreamIterator iterator = - new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); + new InputStreamIterator(Collections.singletonList(inputStream).iterator(), serializer); iterator.forEachRemaining(deserializedData::add); iterator.forEachRemaining(data -> LOG.info("iterator check")); deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); From 77d8dbadcf58cd894981bbebba265828668917f7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 15:43:10 +0900 Subject: [PATCH 161/235] revert unable LimitedInputStream --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index c904e06c56..5b56ddff4a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -90,7 +90,7 @@ public static NonSerializedPartition deserializePartiti // reading input from chained compression InputStream. try (final LimitedInputStream limitedInputStream = new LimitedInputStream(inputStream, partitionSize)) { final InputStreamIterator iterator = - new InputStreamIterator(Collections.singletonList(inputStream).iterator(), serializer); + new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); iterator.forEachRemaining(deserializedData::add); iterator.forEachRemaining(data -> LOG.info("iterator check")); deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); From fdd36db22258401b51cc2caa87b0a7dda6f6edb7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 15:46:11 +0900 Subject: [PATCH 162/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 5b56ddff4a..9c182e85c1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -91,7 +91,7 @@ public static NonSerializedPartition deserializePartiti try (final LimitedInputStream limitedInputStream = new LimitedInputStream(inputStream, partitionSize)) { final InputStreamIterator iterator = new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); - iterator.forEachRemaining(deserializedData::add); + //iterator.forEachRemaining(deserializedData::add); iterator.forEachRemaining(data -> LOG.info("iterator check")); deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), From 0fe8691bed631047f070cf4025f58bac2ce290d2 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 16:15:49 +0900 Subject: [PATCH 163/235] testing w/o compression --- .../optimizer/pass/compiletime/annotating/CompressionPass.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java index 8ac538a3d9..07d92cae32 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java @@ -35,7 +35,7 @@ public final class CompressionPass extends AnnotatingPass { * Default constructor. Uses LZ4 as default. */ public CompressionPass() { - this(CompressionProperty.Value.LZ4); + this(CompressionProperty.Value.None); } /** From 260f423df4106d44666b93c135d0825a92fd4782 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 16:22:05 +0900 Subject: [PATCH 164/235] revert iterator check --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 9c182e85c1..ccbb120c9a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -91,8 +91,8 @@ public static NonSerializedPartition deserializePartiti try (final LimitedInputStream limitedInputStream = new LimitedInputStream(inputStream, partitionSize)) { final InputStreamIterator iterator = new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); - //iterator.forEachRemaining(deserializedData::add); - iterator.forEachRemaining(data -> LOG.info("iterator check")); + iterator.forEachRemaining(deserializedData::add); + //iterator.forEachRemaining(data -> LOG.info("iterator check")); deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), iterator.getNumEncodedBytes()); From c74630e37b3d82b49dc21d740b7435a66fe754bb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 16:36:39 +0900 Subject: [PATCH 165/235] able compression --- .../optimizer/pass/compiletime/annotating/CompressionPass.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java index 07d92cae32..8ac538a3d9 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CompressionPass.java @@ -35,7 +35,7 @@ public final class CompressionPass extends AnnotatingPass { * Default constructor. Uses LZ4 as default. */ public CompressionPass() { - this(CompressionProperty.Value.None); + this(CompressionProperty.Value.LZ4); } /** From 75cef7a6ea758216446bb0ece9f815110e603b2a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 16:46:43 +0900 Subject: [PATCH 166/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index ccbb120c9a..1878552f69 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -253,8 +253,10 @@ public boolean hasNext() { encodedCountingStream = new CountingInputStream(buildInputStream( serializedCountingStream, serializer.getDecodeStreamChainers())); decoder = serializer.getDecoderFactory().create(encodedCountingStream); + LOG.info("Got decoder"); } else { cannotContinueDecoding = true; + LOG.info("Cannot continue decoding"); return false; } } @@ -263,6 +265,7 @@ public boolean hasNext() { throw new RuntimeException(e); } try { + LOG.info("Entered decoding"); next = decoder.decode(); hasNext = true; return true; From a308f1f1e7ecfba6358f47d77a2d5e96a92d1147 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 16:53:05 +0900 Subject: [PATCH 167/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 1878552f69..4bda260a08 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -265,9 +265,9 @@ public boolean hasNext() { throw new RuntimeException(e); } try { - LOG.info("Entered decoding"); next = decoder.decode(); hasNext = true; + LOG.info("decoded"); return true; } catch (final IOException e) { // IOException from decoder indicates EOF event. From 6e9b9ae31db7bda4358ab90675d1ee6f4b3a8199 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 17:20:03 +0900 Subject: [PATCH 168/235] logging --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 4bda260a08..bcb5ce54b8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -270,6 +270,7 @@ public boolean hasNext() { LOG.info("decoded"); return true; } catch (final IOException e) { + LOG.info("IOException", e); // IOException from decoder indicates EOF event. numSerializedBytes += serializedCountingStream.getCount(); numEncodedBytes += encodedCountingStream.getCount(); From 83bb13cc22553cf5297f8b3d51a9a04b7d1b38f8 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 18:47:36 +0900 Subject: [PATCH 169/235] logging --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index e40550e751..887a64e8c7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -106,6 +106,7 @@ private void writeToFile(final Iterable> serializedPartit for(final SerializedPartition serializedPartition : serializedPartitions){ metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData()); + LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); } fileOutputStream.close(); } From 5fbc237213fc26cb319a42e350c4d73880d93432 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 18:53:41 +0900 Subject: [PATCH 170/235] serialized partition length --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 887a64e8c7..1750028a13 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -104,7 +104,7 @@ public CrailFileBlock(final String blockId, private void writeToFile(final Iterable> serializedPartitions) throws Exception { final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ - metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); + metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getData().length); fileOutputStream.write(serializedPartition.getData()); LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); } From 234af78341c2f3a43c10bb36a2161a7cf3f5d964 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 18 Mar 2019 20:19:06 +0900 Subject: [PATCH 171/235] logging erased --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 3 --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 5 ----- 2 files changed, 8 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index bcb5ce54b8..a6bcca3eb1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -253,10 +253,8 @@ public boolean hasNext() { encodedCountingStream = new CountingInputStream(buildInputStream( serializedCountingStream, serializer.getDecodeStreamChainers())); decoder = serializer.getDecoderFactory().create(encodedCountingStream); - LOG.info("Got decoder"); } else { cannotContinueDecoding = true; - LOG.info("Cannot continue decoding"); return false; } } @@ -267,7 +265,6 @@ public boolean hasNext() { try { next = decoder.decode(); hasNext = true; - LOG.info("decoded"); return true; } catch (final IOException e) { LOG.info("IOException", e); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 1750028a13..782adaa1a1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -209,11 +209,6 @@ public Iterable> readPartitions(final KeyRange keyRang LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); int readBytes = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); LOG.info("HY: readBytes: {}",readBytes); - final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( - partitionBytes.length, serializer, key, - new ByteArrayInputStream(partitionBytes)); - deserializePartition.getData().forEach(data -> - LOG.info("deser {}", data)); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. From 6ecbb6339673c803d8e653c853e77fba1aa2e471 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 16:10:16 +0900 Subject: [PATCH 172/235] new getCount() testing --- .../org/apache/nemo/common/DirectByteArrayOutputStream.java | 2 +- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java b/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java index 5d5600d326..e49b900899 100644 --- a/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java +++ b/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java @@ -52,6 +52,6 @@ public byte[] getBufDirectly() { * @return the number of valid bytes in the buffer. */ public int getCount() { - return count; + return size(); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 782adaa1a1..115711e690 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -104,7 +104,7 @@ public CrailFileBlock(final String blockId, private void writeToFile(final Iterable> serializedPartitions) throws Exception { final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ - metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getData().length); + metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData()); LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); } From 865ef6f4165244d6aa76bb3784528481187c1d27 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 18:18:58 +0900 Subject: [PATCH 173/235] partition length debugging --- .../runtime/executor/data/partition/SerializedPartition.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java index 71541c2b2a..217373abfa 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java @@ -117,7 +117,8 @@ public void commit() throws IOException { wrappedStream.close(); this.serializedData = bytesOutputStream.getBufDirectly(); - this.length = bytesOutputStream.getCount(); + //this.length = bytesOutputStream.getCount(); + this.length = serializedData.length; this.committed = true; } } From e449ebc6b95432c5396adcf461793e797d18eccd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 18:25:56 +0900 Subject: [PATCH 174/235] partition length debugging --- .../org/apache/nemo/common/DirectByteArrayOutputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java b/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java index e49b900899..5d5600d326 100644 --- a/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java +++ b/common/src/main/java/org/apache/nemo/common/DirectByteArrayOutputStream.java @@ -52,6 +52,6 @@ public byte[] getBufDirectly() { * @return the number of valid bytes in the buffer. */ public int getCount() { - return size(); + return count; } } From e8b62e0a8bdcd8a75370f6e08c3923d2e213a769 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 18:40:56 +0900 Subject: [PATCH 175/235] partition length debugging --- .../executor/data/partition/SerializedPartition.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java index 217373abfa..d1450412e3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java @@ -115,10 +115,10 @@ public void commit() throws IOException { // We need to close wrappedStream on here, because DirectByteArrayOutputStream:getBufDirectly() returns // inner buffer directly, which can be an unfinished(not flushed) buffer. wrappedStream.close(); - this.serializedData = bytesOutputStream.getBufDirectly(); + this.serializedData = bytesOutputStream.toByteArray(); - //this.length = bytesOutputStream.getCount(); - this.length = serializedData.length; + this.length = bytesOutputStream.getCount(); + //this.length = serializedData.length; this.committed = true; } } From e828a6bf8f32dd3386782c70099ef4503c860a64 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 19:10:25 +0900 Subject: [PATCH 176/235] replace to java native methods --- .../runtime/executor/data/partition/SerializedPartition.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java index d1450412e3..6dcc780662 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java @@ -116,9 +116,7 @@ public void commit() throws IOException { // inner buffer directly, which can be an unfinished(not flushed) buffer. wrappedStream.close(); this.serializedData = bytesOutputStream.toByteArray(); - - this.length = bytesOutputStream.getCount(); - //this.length = serializedData.length; + this.length = bytesOutputStream.size(); this.committed = true; } } From abca84732898886fe32601e8d2db0b6481348aeb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 19:37:24 +0900 Subject: [PATCH 177/235] logging --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index ab1f018962..49df67366a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -161,6 +161,7 @@ public static CrailFileMetadata open(final String me partitionMetadataList.add(partitionMetadata); } } catch (Exception e) { + e.printStackTrace(); throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); From 62cc257ea36d4054ccfd3cbb298c83894031baf0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Thu, 21 Mar 2019 19:46:30 +0900 Subject: [PATCH 178/235] logging erased --- .../nemo/runtime/executor/data/metadata/CrailFileMetadata.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 49df67366a..ab1f018962 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -161,7 +161,6 @@ public static CrailFileMetadata open(final String me partitionMetadataList.add(partitionMetadata); } } catch (Exception e) { - e.printStackTrace(); throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); From 26bd24f56b880d50b803b1cc962ac537414b3a4d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 24 Mar 2019 15:15:38 +0900 Subject: [PATCH 179/235] metadata parameter added --- .../runtime/executor/data/metadata/CrailFileMetadata.java | 4 ++-- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index ab1f018962..7b6f5c073a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -46,7 +46,7 @@ public final class CrailFileMetadata extends FileMetadat private final String metaFilePath; private static CrailConfiguration conf; private static CrailStore fs; - private static CrailFile file=null; + /** * Constructor for creating a non-committed new file metadata. * @@ -141,7 +141,7 @@ public static CrailFileMetadata create(final String * @return the created block metadata. * @throws IOException if fail to open. */ - public static CrailFileMetadata open(final String metaFilePath) throws Exception{ + public static CrailFileMetadata open(final String metaFilePath, CrailStore fs) throws Exception{ LOG.info("HY: metafilePath {}", metaFilePath); final List> partitionMetadataList = new ArrayList<>(); try { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index bd3f150f22..80697406d0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -175,7 +175,7 @@ private CrailFileBlock getBlockFromFile(final String final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final CrailFileMetadata metadata = - CrailFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory)); + CrailFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory), fs); return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); } } From 8aa0a20c14599f700043324ec4705174e3edd040 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 24 Mar 2019 15:28:08 +0900 Subject: [PATCH 180/235] metadata CrailStore and conf minimized for test --- .../data/metadata/CrailFileMetadata.java | 27 +++++-------------- .../executor/data/stores/CrailFileStore.java | 2 +- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 7b6f5c073a..392806410e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -44,7 +44,6 @@ public final class CrailFileMetadata extends FileMetadata { private static final Logger LOG = LoggerFactory.getLogger(CrailFileMetadata.class.getName()); private final String metaFilePath; - private static CrailConfiguration conf; private static CrailStore fs; /** @@ -52,15 +51,10 @@ public final class CrailFileMetadata extends FileMetadat * * @param metaFilePath the metadata file path. */ - private CrailFileMetadata(final String metaFilePath) { + private CrailFileMetadata(final String metaFilePath, CrailStore fs) { super(); this.metaFilePath = metaFilePath; - try { - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); - }catch(Exception e){ - LOG.info("HY: CrailConfiguration failed"); - } + this.fs = fs; } /** @@ -70,15 +64,10 @@ private CrailFileMetadata(final String metaFilePath) { * @param partitionMetadataList the partition metadata list. */ private CrailFileMetadata(final String metaFilePath, - final List> partitionMetadataList) { + final List> partitionMetadataList, CrailStore fs) { super(partitionMetadataList); this.metaFilePath = metaFilePath; - try { - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); - }catch(Exception e){ - LOG.info("HY: CrailConfiguration failed"); - } + this.fs = fs; } /** @@ -103,8 +92,6 @@ public synchronized void commitBlock() throws IOException { LOG.info("HY: metadata commit for block {}", metaFilePath); final Iterable> partitionMetadataItr = getPartitionMetadataList(); try{ - conf = new CrailConfiguration(); - fs = CrailStore.newInstance(conf); CrailBufferedOutputStream metaFileOutputstream =fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile().getBufferedOutputStream(0); for (PartitionMetadata partitionMetadata : partitionMetadataItr) { final byte[] key = SerializationUtils.serialize(partitionMetadata.getKey()); @@ -129,8 +116,8 @@ public synchronized void commitBlock() throws IOException { * @param the key type of the block's partitions. * @return the created block metadata. */ - public static CrailFileMetadata create(final String metaFilePath) { - return new CrailFileMetadata<>(metaFilePath); + public static CrailFileMetadata create(final String metaFilePath, CrailStore fs) { + return new CrailFileMetadata<>(metaFilePath, fs); } /** @@ -163,6 +150,6 @@ public static CrailFileMetadata open(final String me } catch (Exception e) { throw new IOException("HY: File "+metaFilePath+ " does not exist!"); } - return new CrailFileMetadata<>(metaFilePath, partitionMetadataList); + return new CrailFileMetadata<>(metaFilePath, partitionMetadataList, fs); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 80697406d0..2dad9e0a0f 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -80,7 +80,7 @@ public Block createBlock(final String blockId) { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); - final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath); + final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath, fs); return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); } From 48a25bef9477cdfc18767025a034ed47c137d966 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 25 Mar 2019 14:57:52 +0900 Subject: [PATCH 181/235] bugfix --- .../apache/nemo/runtime/executor/data/block/CrailFileBlock.java | 2 +- .../runtime/executor/data/partition/SerializedPartition.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 115711e690..60615eae09 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -105,7 +105,7 @@ private void writeToFile(final Iterable> serializedPartit final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); for(final SerializedPartition serializedPartition : serializedPartitions){ metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); - fileOutputStream.write(serializedPartition.getData()); + fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); } fileOutputStream.close(); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java index 6dcc780662..06fb5c97b7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java @@ -115,7 +115,7 @@ public void commit() throws IOException { // We need to close wrappedStream on here, because DirectByteArrayOutputStream:getBufDirectly() returns // inner buffer directly, which can be an unfinished(not flushed) buffer. wrappedStream.close(); - this.serializedData = bytesOutputStream.toByteArray(); + this.serializedData = bytesOutputStream.getBufDirectly(); this.length = bytesOutputStream.size(); this.committed = true; } From ab4cdf8732e0b99e11537902744e3f06bdc2ce41 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 29 Apr 2019 18:49:17 +0900 Subject: [PATCH 182/235] refactor comments --- .../executor/data/block/CrailFileBlock.java | 2 +- .../data/metadata/CrailFileMetadata.java | 11 +++++----- .../executor/data/stores/CrailFileStore.java | 22 +++++++++---------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 60615eae09..eba27e75bf 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -39,7 +39,7 @@ import java.util.*; /** - * This class represents a block which is stored in (local or remote) file. + * This class represents a block which is stored in CrailStore. * Concurrent read is supported, but concurrent write is not supported. * * @param the key type of its partitions. diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 392806410e..2648df6a8e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -34,10 +34,10 @@ import java.util.List; /** - * This class represents a metadata for a remote file block. - * Because the data is stored in a remote file and globally accessed by multiple nodes, + * This class represents a metadata for a file block in CrailFileSystem. + * Because the data is stored in a CrailFileSystem and globally accessed by multiple nodes, * each read, or deletion for a block needs one instance of this metadata. - * The metadata is store in and read from a file (after a remote file block is committed). + * The metadata is stored in and read from a CrailFile (after a CrailFile block is committed). * @param the key type of its partitions. */ @ThreadSafe @@ -47,7 +47,7 @@ public final class CrailFileMetadata extends FileMetadat private static CrailStore fs; /** - * Constructor for creating a non-committed new file metadata. + * Constructor for creating a non-committed new CrailFile metadata. * * @param metaFilePath the metadata file path. */ @@ -58,10 +58,11 @@ private CrailFileMetadata(final String metaFilePath, CrailStore fs) { } /** - * Constructor for opening a existing file metadata. + * Constructor for opening a existing CrailFile metadata. * * @param metaFilePath the metadata file path. * @param partitionMetadataList the partition metadata list. + * @param fs the CrailStore instance. */ private CrailFileMetadata(final String metaFilePath, final List> partitionMetadataList, CrailStore fs) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 2dad9e0a0f..1728cd525b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -44,11 +44,11 @@ import java.util.Optional; /** - * Stores blocks in a mounted GlusterFS volume. - * Because the data is stored in remote files and globally accessed by multiple nodes, - * each read, or deletion for a file needs one instance of {@link FileBlock}. - * When a remote file block is created, it's metadata is maintained in memory until the block is committed. - * After the block is committed, the metadata is store in and read from a file. + * Stores blocks in CrailStore. + * Since the data is stored in CrailStore and globally accessed by multiple nodes, + * each read, or deletion for a file needs one instance of {@link CrailFileBlock}. + * When CrailFileBlock is created, it's metadata is maintained in memory until the block is committed. + * After the block is committed, the metadata is stored in and read from a CrailStore. */ @ThreadSafe public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { @@ -60,7 +60,7 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFi /** * Constructor. * - * //@param volumeDirectory the remote volume directory which will contain the files. + * @param volumeDirectory the CrailStore directory which will contain the files. * @param jobId the job id. * @param serializerManager the serializer manager. */ @@ -99,7 +99,7 @@ public void writeBlock(final Block block) throws BlockWriteException { } else if (!block.isCommitted()) { throw new BlockWriteException(new Throwable("The block " + block.getId() + "is not committed yet.")); } - // Do nothing. The block have to be written in the remote file during commit. + // Do nothing. The block have to be written in CrailStore file during commit. } /** @@ -161,14 +161,14 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ } /** - * Gets a {@link FileBlock} from the block and it's metadata file. - * Because the data is stored in remote files and globally accessed by multiple nodes, - * each read, or deletion for a file needs one instance of {@link FileBlock}, + * Gets a {@link CrailFileBlock} from the block and it's metadata file. + * Because the data is stored in CrailStore and globally accessed by multiple nodes, + * each read, or deletion for a file needs one instance of {@link CrailFileBlock}, * and the temporary block will not be maintained by this executor. * * @param blockId the ID of the block to get. * @param the type of the key of the block. - * @return the {@link FileBlock} gotten. + * @return the {@link CrailFileBlock} gotten. * @throws IOException if fail to get. */ private CrailFileBlock getBlockFromFile(final String blockId) throws Exception { From 1bf866e9012711a08387db65f17d9e1cb01e266c Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 30 Apr 2019 10:01:15 +0900 Subject: [PATCH 183/235] checkstyle and few comments --- .../executor/data/block/CrailFileBlock.java | 45 ++++++++++--------- .../data/metadata/CrailFileMetadata.java | 28 ++++++------ .../executor/data/stores/CrailFileStore.java | 22 ++++----- 3 files changed, 47 insertions(+), 48 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index eba27e75bf..9c69ef8706 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -45,15 +45,15 @@ * @param the key type of its partitions. */ @NotThreadSafe -public final class CrailFileBlock implements Block{ +public final class CrailFileBlock implements Block { private static final Logger LOG = LoggerFactory.getLogger(CrailFileBlock.class.getName()); private final String id; private final Map> nonCommittedPartitionsMap; private final Serializer serializer; private final String filePath; private final FileMetadata metadata; - CrailStore fs = null; - CrailFile file = null; + private CrailStore fs = null; + private CrailFile file = null; /** * Constructor. @@ -69,7 +69,7 @@ public CrailFileBlock(final String blockId, final Serializer serializer, final String filePath, final FileMetadata metadata, - CrailStore fs) { + final CrailStore fs) { this.id = blockId; this.nonCommittedPartitionsMap = new HashMap<>(); this.serializer = serializer; @@ -78,16 +78,17 @@ public CrailFileBlock(final String blockId, try { LOG.info("HY: FileBlock entered"); this.fs = fs; - this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile(); + this.file = + fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) + .get().asFile(); file.syncDir(); LOG.info("HY: crail file block created"); } catch (Exception e1) { - try{ + try { this.fs = fs; this.file = fs.lookup(filePath).get().asFile(); LOG.info("HY: {} fetched", blockId); - } - catch(Exception e2){ + } catch (Exception e2) { LOG.info("HY: {} fetch failed"); } } @@ -99,11 +100,11 @@ public CrailFileBlock(final String blockId, * Invariant: This method does not support concurrent write. * * @param serializedPartitions the iterable of the serialized partitions to write. - * @throws IOException if fail to write. + * @throws Exception if fail to write. */ private void writeToFile(final Iterable> serializedPartitions) throws Exception { final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); - for(final SerializedPartition serializedPartition : serializedPartitions){ + for (final SerializedPartition serializedPartition : serializedPartitions) { metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); @@ -200,7 +201,7 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())){ + try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())) { for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { @@ -208,7 +209,7 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); int readBytes = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - LOG.info("HY: readBytes: {}",readBytes); + LOG.info("HY: readBytes: {}", readBytes); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. @@ -216,9 +217,9 @@ public Iterable> readPartitions(final KeyRange keyRang LOG.info("HY: partition skipped"); } } - }catch(Exception e){ + } catch (Exception e) { e.printStackTrace(); - } + } for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( @@ -266,9 +267,9 @@ public Iterable> readSerializedPartitions(final KeyRange skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } - }catch (final IOException e) { + } catch (final IOException e) { throw new BlockFetchException(e); - } catch (final Exception e2){ + } catch (final Exception e2) { e2.printStackTrace(); } @@ -325,12 +326,12 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { public void deleteFile() throws IOException { metadata.deleteMetadata(); try { - if(fs.lookup(filePath).get()!=null) + if (fs.lookup(filePath).get() != null) { fs.delete(filePath, true); - }catch (IOException e){ + } + } catch (IOException e) { e.printStackTrace(); - } - catch (Exception e){ + } catch (Exception e) { LOG.info("HY: deleteFile failed"); e.printStackTrace(); } @@ -393,7 +394,9 @@ public synchronized void commitPartitions() throws BlockWriteException { * @return the ID of this block. */ @Override - public String getId() { return id; } + public String getId() { + return id; + } /** * @return whether this block is committed or not. diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 2648df6a8e..d7c2afb67d 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -20,16 +20,11 @@ import org.apache.commons.lang3.SerializationUtils; import org.apache.crail.*; -import org.apache.crail.conf.CrailConfiguration; -import org.apache.nemo.common.exception.BlockFetchException; -import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.concurrent.ThreadSafe; import java.io.*; -import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; @@ -50,8 +45,9 @@ public final class CrailFileMetadata extends FileMetadat * Constructor for creating a non-committed new CrailFile metadata. * * @param metaFilePath the metadata file path. + * @param fs the CrailStore instance. */ - private CrailFileMetadata(final String metaFilePath, CrailStore fs) { + private CrailFileMetadata(final String metaFilePath, final CrailStore fs) { super(); this.metaFilePath = metaFilePath; this.fs = fs; @@ -65,7 +61,7 @@ private CrailFileMetadata(final String metaFilePath, CrailStore fs) { * @param fs the CrailStore instance. */ private CrailFileMetadata(final String metaFilePath, - final List> partitionMetadataList, CrailStore fs) { + final List> partitionMetadataList, final CrailStore fs) { super(partitionMetadataList); this.metaFilePath = metaFilePath; this.fs = fs; @@ -92,8 +88,10 @@ public void deleteMetadata() throws IOException { public synchronized void commitBlock() throws IOException { LOG.info("HY: metadata commit for block {}", metaFilePath); final Iterable> partitionMetadataItr = getPartitionMetadataList(); - try{ - CrailBufferedOutputStream metaFileOutputstream =fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().asFile().getBufferedOutputStream(0); + try { + CrailBufferedOutputStream metaFileOutputstream = + fs.create(metaFilePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) + .get().asFile().getBufferedOutputStream(0); for (PartitionMetadata partitionMetadata : partitionMetadataItr) { final byte[] key = SerializationUtils.serialize(partitionMetadata.getKey()); metaFileOutputstream.writeInt(key.length); @@ -102,8 +100,7 @@ public synchronized void commitBlock() throws IOException { metaFileOutputstream.writeLong(partitionMetadata.getOffset()); } metaFileOutputstream.close(); - } - catch(Exception e){ + } catch (Exception e) { LOG.info("HY: CrailBufferedOutputStream exception occurred"); e.printStackTrace(); } @@ -114,10 +111,11 @@ public synchronized void commitBlock() throws IOException { * Creates a new block metadata. * * @param metaFilePath the path of the file to write metadata. + * @param fs the CrailStore instance. * @param the key type of the block's partitions. * @return the created block metadata. */ - public static CrailFileMetadata create(final String metaFilePath, CrailStore fs) { + public static CrailFileMetadata create(final String metaFilePath, final CrailStore fs) { return new CrailFileMetadata<>(metaFilePath, fs); } @@ -125,11 +123,13 @@ public static CrailFileMetadata create(final String * Opens a existing block metadata in file. * * @param metaFilePath the path of the file to write metadata. + * @param fs the CrailStore instance * @param the key type of the block's partitions. * @return the created block metadata. * @throws IOException if fail to open. */ - public static CrailFileMetadata open(final String metaFilePath, CrailStore fs) throws Exception{ + public static CrailFileMetadata open(final String metaFilePath, + final CrailStore fs) throws IOException { LOG.info("HY: metafilePath {}", metaFilePath); final List> partitionMetadataList = new ArrayList<>(); try { @@ -149,7 +149,7 @@ public static CrailFileMetadata open(final String me partitionMetadataList.add(partitionMetadata); } } catch (Exception e) { - throw new IOException("HY: File "+metaFilePath+ " does not exist!"); + throw new IOException("HY: File " + metaFilePath + " does not exist!"); } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList, fs); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 1728cd525b..f46654a4e5 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -27,18 +27,13 @@ import org.apache.nemo.runtime.executor.data.block.Block; import org.apache.nemo.runtime.executor.data.block.CrailFileBlock; import org.apache.nemo.runtime.executor.data.metadata.CrailFileMetadata; -import org.apache.nemo.runtime.executor.data.metadata.FileMetadata; -import org.apache.nemo.runtime.executor.data.metadata.LocalFileMetadata; import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; -import org.apache.nemo.runtime.executor.data.metadata.RemoteFileMetadata; -import org.apache.nemo.runtime.executor.data.block.FileBlock; import org.apache.reef.tang.annotations.Parameter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; -import java.io.File; import java.io.IOException; import java.io.Serializable; import java.util.Optional; @@ -63,6 +58,7 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFi * @param volumeDirectory the CrailStore directory which will contain the files. * @param jobId the job id. * @param serializerManager the serializer manager. + * @throws Exception for any error occurred while trying to set Crail requirements. */ @Inject private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final String volumeDirectory, @@ -115,7 +111,7 @@ public void writeBlock(final Block block) throws BlockWriteException { public Optional readBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - if (fs.lookup(filePath).get()==null) { + if (fs.lookup(filePath).get() == null) { return Optional.empty(); } else { try { @@ -123,7 +119,7 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio return Optional.of(block); } catch (final IOException e) { throw new BlockFetchException(e); - } catch (Exception e){ + } catch (Exception e) { e.printStackTrace(); throw new BlockFetchException(e); } @@ -141,11 +137,11 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio * @return whether the block exists or not. */ @Override - public boolean deleteBlock(final String blockId) throws BlockFetchException{ + public boolean deleteBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { - if (fs.lookup(filePath).get()!=null) { + if (fs.lookup(filePath).get() != null) { final CrailFileBlock block = getBlockFromFile(blockId); block.deleteFile(); return true; @@ -154,9 +150,9 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ } } catch (final IOException e) { throw new BlockFetchException(e); - } catch (Exception e){ - e.printStackTrace(); - throw new BlockFetchException(e); + } catch (final Exception e) { + e.printStackTrace(); + throw new BlockFetchException(e); } } @@ -169,7 +165,7 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException{ * @param blockId the ID of the block to get. * @param the type of the key of the block. * @return the {@link CrailFileBlock} gotten. - * @throws IOException if fail to get. + * @throws Exception if fail to get. */ private CrailFileBlock getBlockFromFile(final String blockId) throws Exception { final Serializer serializer = getSerializerFromWorker(blockId); From 1d5a6b1804047d219556df9e880594b4751f1010 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 2 Jun 2019 12:12:32 +0900 Subject: [PATCH 184/235] comments trimmed --- .../org/apache/nemo/driver/NemoDriver.java | 12 ++++------- .../executor/data/block/CrailFileBlock.java | 20 ++++++------------- .../executor/data/block/FileBlock.java | 5 ----- .../data/metadata/CrailFileMetadata.java | 8 +++----- .../executor/data/stores/CrailFileStore.java | 2 +- .../datatransfer/BlockOutputWriter.java | 1 - 6 files changed, 14 insertions(+), 34 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 0b9c40045e..b4387722a1 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -158,20 +158,16 @@ public void onNext(final StartTime startTime) { try { conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); - - LOG.info("creating main dir /tmp_crail"); - try{ + try{ fs.delete("/tmp_crail", true).get().syncDir(); - } - catch(Exception e){ + } catch(Exception e){ LOG.info("failed to delete /tmp_crail"); - } + } fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); fs.create("/tmp_crail/files", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - LOG.info("creating main dir done"); } catch(Exception e){ - LOG.info("HY: Error occurred during driver crail main dir setup"); + LOG.info("Failed to create Crail directory"); e.printStackTrace(); } runtimeMaster.requestContainer(resourceSpecificationString); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 9c69ef8706..1c1ba15a22 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -76,20 +76,17 @@ public CrailFileBlock(final String blockId, this.filePath = filePath; this.metadata = metadata; try { - LOG.info("HY: FileBlock entered"); this.fs = fs; - this.file = - fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) - .get().asFile(); + this.file = fs.create(filePath, CrailNodeType.DATAFILE, + CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) + .get().asFile(); file.syncDir(); - LOG.info("HY: crail file block created"); } catch (Exception e1) { try { this.fs = fs; this.file = fs.lookup(filePath).get().asFile(); - LOG.info("HY: {} fetched", blockId); } catch (Exception e2) { - LOG.info("HY: {} fetch failed"); + LOG.info("{} fetch failed", blockId); } } } @@ -107,7 +104,6 @@ private void writeToFile(final Iterable> serializedPartit for (final SerializedPartition serializedPartition : serializedPartitions) { metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); - LOG.info(String.format("HY: Expected write = %d, actual write = %d", serializedPartition.getLength(), serializedPartition.getData().length)); } fileOutputStream.close(); } @@ -207,14 +203,11 @@ public Iterable> readPartitions(final KeyRange keyRang if (keyRange.includes(key)) { // The key value of this partition is in the range. final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - LOG.info("HY: partition length of the block to read {}", partitionMetadata.getPartitionSize()); - int readBytes = fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - LOG.info("HY: readBytes: {}", readBytes); + fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); - LOG.info("HY: partition skipped"); } } } catch (Exception e) { @@ -332,7 +325,7 @@ public void deleteFile() throws IOException { } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { - LOG.info("HY: deleteFile failed"); + LOG.info("Failed to delete file"); e.printStackTrace(); } } @@ -349,7 +342,6 @@ public synchronized Optional> commit() throws BlockWriteException { if (!metadata.isCommitted()) { commitPartitions(); metadata.commitBlock(); - LOG.info("HY: block and metadata commit for {}", id); } final List> partitionMetadataList = metadata.getPartitionMetadataList(); final Map partitionSizes = new HashMap<>(partitionMetadataList.size()); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index e90ac9567c..a833100bfd 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -187,11 +187,6 @@ public Iterable> readPartitions(final KeyRange keyRang final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - final NonSerializedPartition deserializePartition = DataUtil.deserializePartition( - partitionBytes.length, serializer, key, - new ByteArrayInputStream(partitionBytes)); - deserializePartition.getData().forEach(data -> - LOG.info("deser {}", data)); } else { // Have to skip this partition. skipBytes(fileStream, partitionMetadata.getPartitionSize()); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index d7c2afb67d..b829cf1bcb 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -75,7 +75,7 @@ public void deleteMetadata() throws IOException { try { fs.delete(metaFilePath, true).get().syncDir(); } catch (Exception e) { - LOG.info("HY: metadata deletion failed"); + LOG.info("Metadata deletion failed"); e.printStackTrace(); } } @@ -86,7 +86,6 @@ public void deleteMetadata() throws IOException { */ @Override public synchronized void commitBlock() throws IOException { - LOG.info("HY: metadata commit for block {}", metaFilePath); final Iterable> partitionMetadataItr = getPartitionMetadataList(); try { CrailBufferedOutputStream metaFileOutputstream = @@ -101,7 +100,7 @@ public synchronized void commitBlock() throws IOException { } metaFileOutputstream.close(); } catch (Exception e) { - LOG.info("HY: CrailBufferedOutputStream exception occurred"); + LOG.info("Error while writing meta data"); e.printStackTrace(); } setCommitted(true); @@ -130,7 +129,6 @@ public static CrailFileMetadata create(final String */ public static CrailFileMetadata open(final String metaFilePath, final CrailStore fs) throws IOException { - LOG.info("HY: metafilePath {}", metaFilePath); final List> partitionMetadataList = new ArrayList<>(); try { CrailBufferedInputStream dataInputStream = fs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); @@ -149,7 +147,7 @@ public static CrailFileMetadata open(final String me partitionMetadataList.add(partitionMetadata); } } catch (Exception e) { - throw new IOException("HY: File " + metaFilePath + " does not exist!"); + throw new IOException("Metadata " + metaFilePath + " does not exist!"); } return new CrailFileMetadata<>(metaFilePath, partitionMetadataList, fs); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index f46654a4e5..36c0ab3833 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -55,7 +55,7 @@ public final class CrailFileStore extends AbstractBlockStore implements RemoteFi /** * Constructor. * - * @param volumeDirectory the CrailStore directory which will contain the files. + * @param volumeDirectory the CrailStore directory where we contain the files. * @param jobId the job id. * @param serializerManager the serializer manager. * @throws Exception for any error occurred while trying to set Crail requirements. diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java index 10a807a8ab..02e1263b28 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java @@ -84,7 +84,6 @@ public final class BlockOutputWriter implements OutputWriter { public void write(final Object element) { if (nonDummyBlock) { blockToWrite.write(partitioner.partition(element), element); - //LOG.info("HY: class of block written: {}", blockToWrite.getClass()); final DedicatedKeyPerElement dedicatedKeyPerElement = partitioner.getClass().getAnnotation(DedicatedKeyPerElement.class); if (dedicatedKeyPerElement != null) { From a1ae1bf2b4b387d5eaee0dd140a4a97af408edcf Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 2 Jun 2019 12:24:47 +0900 Subject: [PATCH 185/235] comments trimmed --- .../nemo/runtime/executor/data/DataUtil.java | 7 ------- .../runtime/executor/data/block/FileBlock.java | 18 +++++++++--------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index a6bcca3eb1..5650c75d4a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -92,15 +92,9 @@ public static NonSerializedPartition deserializePartiti final InputStreamIterator iterator = new InputStreamIterator(Collections.singletonList(limitedInputStream).iterator(), serializer); iterator.forEachRemaining(deserializedData::add); - //iterator.forEachRemaining(data -> LOG.info("iterator check")); - deserializedData.forEach(data -> LOG.info("inside DataUtil: {} {}", key, data)); return new NonSerializedPartition(key, deserializedData, iterator.getNumSerializedBytes(), iterator.getNumEncodedBytes()); } - catch(Exception e){ - e.printStackTrace(); - return null; - } } /** @@ -267,7 +261,6 @@ public boolean hasNext() { hasNext = true; return true; } catch (final IOException e) { - LOG.info("IOException", e); // IOException from decoder indicates EOF event. numSerializedBytes += serializedCountingStream.getCount(); numEncodedBytes += encodedCountingStream.getCount(); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index a833100bfd..cb04980860 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -81,7 +81,7 @@ public FileBlock(final String blockId, * @throws IOException if fail to write. */ private void writeToFile(final Iterable> serializedPartitions) - throws IOException { + throws IOException { try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { for (final SerializedPartition serializedPartition : serializedPartitions) { // Reserve a partition write and get the metadata. @@ -128,13 +128,13 @@ public void write(final K key, */ @Override public void writePartitions(final Iterable> partitions) - throws BlockWriteException { + throws BlockWriteException { if (metadata.isCommitted()) { throw new BlockWriteException(new Throwable("The partition is already committed!")); } else { try { final Iterable> convertedPartitions = - DataUtil.convertToSerPartitions(serializer, partitions); + DataUtil.convertToSerPartitions(serializer, partitions); writeSerializedPartitions(convertedPartitions); } catch (final IOException e) { throw new BlockWriteException(e); @@ -151,7 +151,7 @@ public void writePartitions(final Iterable> partitions */ @Override public void writeSerializedPartitions(final Iterable> partitions) - throws BlockWriteException { + throws BlockWriteException { if (metadata.isCommitted()) { throw new BlockWriteException(new Throwable("The partition is already committed!")); } else { @@ -195,9 +195,9 @@ public Iterable> readPartitions(final KeyRange keyRang } for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { final NonSerializedPartition deserializePartition = - DataUtil.deserializePartition( - partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), - new ByteArrayInputStream(partitionKeyBytes.right())); + DataUtil.deserializePartition( + partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), + new ByteArrayInputStream(partitionKeyBytes.right())); deserializedPartitions.add(deserializePartition); } } catch (final IOException e) { @@ -235,7 +235,7 @@ public Iterable> readSerializedPartitions(final KeyRange throw new IOException("The read data size does not match with the partition size."); } partitionsInRange.add(new SerializedPartition<>( - key, serializedData, serializedData.length)); + key, serializedData, serializedData.length)); } else { // Have to skip this partition. skipBytes(fileStream, partitionmetadata.getPartitionSize()); @@ -323,7 +323,7 @@ public synchronized Optional> commit() throws BlockWriteException { final long partitionSize = partitionMetadata.getPartitionSize(); if (partitionSizes.containsKey(key)) { partitionSizes.compute(key, - (existingKey, existingValue) -> existingValue + partitionSize); + (existingKey, existingValue) -> existingValue + partitionSize); } else { partitionSizes.put(key, partitionSize); } From 581006cc1c217929eff44ef3df2bf2e1698e5ff3 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 2 Jun 2019 12:28:10 +0900 Subject: [PATCH 186/235] comments trimmed --- .../java/org/apache/nemo/runtime/executor/data/DataUtil.java | 1 - .../nemo/runtime/executor/data/stores/LocalFileStore.java | 4 +--- .../nemo/runtime/executor/datatransfer/BlockOutputWriter.java | 3 ++- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java index 5650c75d4a..80e83df4e2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/DataUtil.java @@ -29,7 +29,6 @@ import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import sun.reflect.annotation.ExceptionProxy; import java.io.*; import java.util.*; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java index 477fd9be45..62758aa3f8 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/LocalFileStore.java @@ -26,10 +26,7 @@ import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.apache.nemo.runtime.executor.data.metadata.LocalFileMetadata; import org.apache.nemo.runtime.executor.data.block.FileBlock; -import org.apache.nemo.runtime.executor.task.TaskExecutor; import org.apache.reef.tang.annotations.Parameter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; @@ -41,6 +38,7 @@ @ThreadSafe public final class LocalFileStore extends LocalBlockStore { private final String fileDirectory; + /** * Constructor. * diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java index 02e1263b28..4b85087fd7 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/datatransfer/BlockOutputWriter.java @@ -83,7 +83,8 @@ public final class BlockOutputWriter implements OutputWriter { @Override public void write(final Object element) { if (nonDummyBlock) { - blockToWrite.write(partitioner.partition(element), element); + blockToWrite.write(partitioner.partition(element), element); + final DedicatedKeyPerElement dedicatedKeyPerElement = partitioner.getClass().getAnnotation(DedicatedKeyPerElement.class); if (dedicatedKeyPerElement != null) { From 1bb1d9bbce421666993d17a380d5a3f659ae4716 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Sun, 2 Jun 2019 12:30:52 +0900 Subject: [PATCH 187/235] reverted --- .../runtime/executor/data/partition/SerializedPartition.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java index 06fb5c97b7..71541c2b2a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/partition/SerializedPartition.java @@ -116,7 +116,8 @@ public void commit() throws IOException { // inner buffer directly, which can be an unfinished(not flushed) buffer. wrappedStream.close(); this.serializedData = bytesOutputStream.getBufDirectly(); - this.length = bytesOutputStream.size(); + + this.length = bytesOutputStream.getCount(); this.committed = true; } } From 284fa861c729aafb922fdd102f318c94f33d42c4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 3 Jun 2019 18:12:12 +0900 Subject: [PATCH 188/235] refactoring --- .../java/org/apache/nemo/driver/NemoDriver.java | 13 +++++-------- .../executor/data/metadata/CrailFileMetadata.java | 2 +- .../executor/data/metadata/FileMetadata.java | 2 +- .../executor/data/stores/CrailFileStore.java | 9 +++------ 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index b4387722a1..5d5a337528 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -56,7 +56,6 @@ import org.slf4j.LoggerFactory; import javax.inject.Inject; -import java.io.IOException; import java.io.Serializable; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -154,19 +153,17 @@ public final class StartHandler implements EventHandler { @Override public void onNext(final StartTime startTime) { setUpLogger(); - boolean baseDirExists; try { conf = new CrailConfiguration(); fs = CrailStore.newInstance(conf); try{ - fs.delete("/tmp_crail", true).get().syncDir(); - } catch(Exception e){ - LOG.info("failed to delete /tmp_crail"); + fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + } catch(Exception e ) { + LOG.info("tmp_crail already exists"); } - fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - fs.create("/tmp_crail/files", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); + fs.create("/tmp_crail/"+jobId, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); } - catch(Exception e){ + catch(Exception e) { LOG.info("Failed to create Crail directory"); e.printStackTrace(); } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index b829cf1bcb..543bc2806a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -71,7 +71,7 @@ private CrailFileMetadata(final String metaFilePath, * @see FileMetadata#deleteMetadata() */ @Override - public void deleteMetadata() throws IOException { + public void deleteMetadata() { try { fs.delete(metaFilePath, true).get().syncDir(); } catch (Exception e) { diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java index 2f95871f09..59844df830 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java @@ -90,7 +90,7 @@ public final List> getPartitionMetadataList() throws IOExce * * @throws IOException if fail to delete. */ - public abstract void deleteMetadata() throws IOException; + public abstract void deleteMetadata() throws IOException, Exception; /** * Notifies that all writes are finished for the block corresponding to this metadata. diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 36c0ab3833..1ddddd0fb1 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -29,8 +29,6 @@ import org.apache.nemo.runtime.executor.data.metadata.CrailFileMetadata; import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.apache.reef.tang.annotations.Parameter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; @@ -47,10 +45,9 @@ */ @ThreadSafe public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { - private static final Logger LOG = LoggerFactory.getLogger(CrailFileStore.class.getName()); private final String fileDirectory; - private CrailConfiguration conf = null; - private CrailStore fs = null; + private CrailConfiguration conf; + private CrailStore fs; /** * Constructor. @@ -67,7 +64,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); - this.fileDirectory = volumeDirectory + "/files"; + this.fileDirectory = volumeDirectory + jobId; } @Override From fa43b9a026e726d99dac18dd75f4e0c4bc19ab7e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 3 Jun 2019 18:12:54 +0900 Subject: [PATCH 189/235] crail-assembly deletion -> build failure --- pom.xml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index 50624b42be..b3ec4dd5ae 100644 --- a/pom.xml +++ b/pom.xml @@ -114,16 +114,11 @@ under the License. ${powermock.version} test - - org.apache.crail - crail-assembly - 1.2-incubating-SNAPSHOT + + org.apache.crail + crail-client + 1.2-incubating-SNAPSHOT - - org.apache.crail - crail-client - 1.2-incubating-SNAPSHOT - From 76f34cae16231867496dd08f49bb04e4b3193c18 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 3 Jun 2019 18:18:33 +0900 Subject: [PATCH 190/235] ?? --- .../nemo/runtime/executor/data/metadata/FileMetadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java index 59844df830..2f95871f09 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/FileMetadata.java @@ -90,7 +90,7 @@ public final List> getPartitionMetadataList() throws IOExce * * @throws IOException if fail to delete. */ - public abstract void deleteMetadata() throws IOException, Exception; + public abstract void deleteMetadata() throws IOException; /** * Notifies that all writes are finished for the block corresponding to this metadata. From 108466e830404c3e3830c770160698eab1001019 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 3 Jun 2019 18:33:06 +0900 Subject: [PATCH 191/235] Crail API changed --- .../driver/src/main/java/org/apache/nemo/driver/NemoDriver.java | 2 +- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 5d5a337528..1279272312 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -154,7 +154,7 @@ public final class StartHandler implements EventHandler { public void onNext(final StartTime startTime) { setUpLogger(); try { - conf = new CrailConfiguration(); + conf = CrailConfiguration.createConfigurationFromFile(); fs = CrailStore.newInstance(conf); try{ fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 1ddddd0fb1..90f8368a9c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -62,7 +62,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - this.conf = new CrailConfiguration(); + this.conf = CrailConfiguration.createConfigurationFromFile(); this.fs = CrailStore.newInstance(conf); this.fileDirectory = volumeDirectory + jobId; } From 185b32edaaaee5409f1bf023e10f73fbaf956f11 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:20:23 +0900 Subject: [PATCH 192/235] crail directory just in the root since it is just an ephemeral data... --- .../java/org/apache/nemo/conf/JobConf.java | 2 +- .../org/apache/nemo/driver/NemoDriver.java | 20 ------------------- .../executor/data/stores/CrailFileStore.java | 3 ++- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 69995a216b..709bfa51c1 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/tmp_crail") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/") public final class CrailVolumeDirectory implements Name { } diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 1279272312..b8d5885866 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -18,8 +18,6 @@ */ package org.apache.nemo.driver; -import org.apache.crail.*; -import org.apache.crail.conf.CrailConfiguration; import org.apache.nemo.common.ir.IdManager; import org.apache.nemo.compiler.optimizer.pass.compiletime.annotating.ResourceSitePass; import org.apache.nemo.conf.JobConf; @@ -88,10 +86,6 @@ public final class NemoDriver { // Client for sending log messages private final RemoteClientMessageLoggingHandler handler; - //Crail - CrailConfiguration conf; - CrailStore fs; - @Inject private NemoDriver(final UserApplicationRunner userApplicationRunner, final RuntimeMaster runtimeMaster, @@ -153,20 +147,6 @@ public final class StartHandler implements EventHandler { @Override public void onNext(final StartTime startTime) { setUpLogger(); - try { - conf = CrailConfiguration.createConfigurationFromFile(); - fs = CrailStore.newInstance(conf); - try{ - fs.create("/tmp_crail", CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - } catch(Exception e ) { - LOG.info("tmp_crail already exists"); - } - fs.create("/tmp_crail/"+jobId, CrailNodeType.DIRECTORY, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true).get().syncDir(); - } - catch(Exception e) { - LOG.info("Failed to create Crail directory"); - e.printStackTrace(); - } runtimeMaster.requestContainer(resourceSpecificationString); } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 90f8368a9c..643974f0b6 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -62,7 +62,8 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - this.conf = CrailConfiguration.createConfigurationFromFile(); + String base = System.getenv("CRAIL_HOME"); + this.conf = CrailConfiguration.createConfigurationFromFile(base+"/conf/crail-site.conf"); this.fs = CrailStore.newInstance(conf); this.fileDirectory = volumeDirectory + jobId; } From a25eef27aefc0e98ff2583a515b3e8323a0885cd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:24:46 +0900 Subject: [PATCH 193/235] crail API --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 643974f0b6..90f8368a9c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -62,8 +62,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - String base = System.getenv("CRAIL_HOME"); - this.conf = CrailConfiguration.createConfigurationFromFile(base+"/conf/crail-site.conf"); + this.conf = CrailConfiguration.createConfigurationFromFile(); this.fs = CrailStore.newInstance(conf); this.fileDirectory = volumeDirectory + jobId; } From d57c442d90de3f3180716fa7a69d49af5dc76ed4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:38:25 +0900 Subject: [PATCH 194/235] dependency --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pom.xml b/pom.xml index b3ec4dd5ae..cfd6d69eae 100644 --- a/pom.xml +++ b/pom.xml @@ -114,6 +114,11 @@ under the License. ${powermock.version} test + + org.apache.crail + crail-assembly + 1.2-incubating-SNAPSHOT + org.apache.crail crail-client From c6ae2efb09e3589c71fb8abdb2c6c229ace8de79 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:43:49 +0900 Subject: [PATCH 195/235] all dependencies --- pom.xml | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cfd6d69eae..92c556f2f2 100644 --- a/pom.xml +++ b/pom.xml @@ -116,7 +116,52 @@ under the License. org.apache.crail - crail-assembly + crail-storage + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-storage-narpc + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-storage-rdma + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-storage-nvmf + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-rpc + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-rpc-narpc + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-parent + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-hdfs + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-namenode + 1.2-incubating-SNAPSHOT + + + org.apache.crail + crail-rpc 1.2-incubating-SNAPSHOT From 8d5e21a7f7e6c106b9638feaf52724ae4adc949d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:44:42 +0900 Subject: [PATCH 196/235] dependency --- pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pom.xml b/pom.xml index 92c556f2f2..44452c5e2e 100644 --- a/pom.xml +++ b/pom.xml @@ -144,11 +144,6 @@ under the License. crail-rpc-narpc 1.2-incubating-SNAPSHOT - - org.apache.crail - crail-parent - 1.2-incubating-SNAPSHOT - org.apache.crail crail-hdfs From 94e1928a53d36bbee38fe9d25c63b7566778701f Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 11:51:54 +0900 Subject: [PATCH 197/235] path edit --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 90f8368a9c..c0afdea50b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -64,7 +64,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri super(serializerManager); this.conf = CrailConfiguration.createConfigurationFromFile(); this.fs = CrailStore.newInstance(conf); - this.fileDirectory = volumeDirectory + jobId; + this.fileDirectory = volumeDirectory; } @Override From e04d02b046f69b48e82639413013504939e5ec68 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 12:04:54 +0900 Subject: [PATCH 198/235] path edit --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 709bfa51c1..92863ff6e4 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -79,7 +79,7 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "/") + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "") public final class CrailVolumeDirectory implements Name { } From 76c0e51acdfbd0467af471d219f3b63f12ef9006 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:15:01 +0900 Subject: [PATCH 199/235] refactoring --- beam_test_executor_resources.json | 12 ------------ bin/run_beam.sh | 2 +- .../runtime/executor/data/BlockManagerWorker.java | 1 - 3 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 beam_test_executor_resources.json diff --git a/beam_test_executor_resources.json b/beam_test_executor_resources.json deleted file mode 100644 index 91f7aee668..0000000000 --- a/beam_test_executor_resources.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "type": "Transient", - "memory_mb": 512, - "capacity": 4 - }, - { - "type": "Reserved", - "memory_mb": 512, - "capacity": 4 - } -] diff --git a/bin/run_beam.sh b/bin/run_beam.sh index c2dcf6611a..017a3e0199 100755 --- a/bin/run_beam.sh +++ b/bin/run_beam.sh @@ -19,5 +19,5 @@ java -Dlog4j.configuration=file://`pwd`/log4j.properties -cp examples/beam/target/nemo-examples-beam-$(mvn -q \ -Dexec.executable=echo -Dexec.args='${project.version}' \ - --non-recursive exec:exec)-shaded.jar:$CRAIL_JAR:`yarn classpath` org.apache.nemo.client.JobLauncher "$@" + --non-recursive exec:exec)-shaded.jar:`yarn classpath` org.apache.nemo.client.JobLauncher "$@" diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index d35402c84b..3e99e7d155 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -424,7 +424,6 @@ private CompletableFuture getDataFromLocalBlock( numSerializedBytes += partition.getNumSerializedBytes(); numEncodedBytes += partition.getNumEncodedBytes(); } - LOG.info("HY: numSerializedBytes {}, numEncodedBytes {}", numSerializedBytes, numEncodedBytes); return CompletableFuture.completedFuture(DataUtil.IteratorWithNumBytes.of(innerIterator, numSerializedBytes, numEncodedBytes)); } catch (final DataUtil.IteratorWithNumBytes.NumBytesNotSupportedException e) { From a6a15f4ecfdb035f8d68a4018b7a1356f90690eb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:16:41 +0900 Subject: [PATCH 200/235] refactoring --- bin/run_beam.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/run_beam.sh b/bin/run_beam.sh index 017a3e0199..cbd082c7a1 100755 --- a/bin/run_beam.sh +++ b/bin/run_beam.sh @@ -20,4 +20,3 @@ java -Dlog4j.configuration=file://`pwd`/log4j.properties -cp examples/beam/target/nemo-examples-beam-$(mvn -q \ -Dexec.executable=echo -Dexec.args='${project.version}' \ --non-recursive exec:exec)-shaded.jar:`yarn classpath` org.apache.nemo.client.JobLauncher "$@" - From 5cd3690b6b7ca3eac3f9fd93c080de02b21507d1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:30:04 +0900 Subject: [PATCH 201/235] refactoring --- .../java/org/apache/nemo/conf/JobConf.java | 3 +- .../nemo/examples/beam/tpch/Schemas.java | 363 ------------------ .../apache/nemo/examples/beam/tpch/Tpch.java | 196 ---------- 3 files changed, 2 insertions(+), 560 deletions(-) delete mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java delete mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 92863ff6e4..ca16edd58c 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -78,8 +78,9 @@ public final class GlusterVolumeDirectory implements Name { /** * Directory points the CrailFileSystem to store files. + * */ - @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "") + @NamedParameter(doc = "Root Directory of CrailFS volume", short_name = "crail_dir", default_value = "") public final class CrailVolumeDirectory implements Name { } diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java deleted file mode 100644 index f9dbd285f6..0000000000 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nemo.examples.beam.tpch; - -import com.google.common.collect.ImmutableMap; -import org.apache.beam.sdk.schemas.Schema; - -/** - * A simple SQL application. - * (Copied and adapted from https://github.com/apache/beam/pull/6240) - */ -public final class Schemas { - /** - * Private. - */ - private Schemas() { - } - - public static final ImmutableMap COLUMN_PREFIX = ImmutableMap.builder() - .put("lineitem", "l_") - .put("customer", "c_") - .put("supplier", "s_") - .put("partsupp", "ps_") - .put("part", "p_") - .put("orders", "o_") - .put("nation", "n_") - .put("region", "r_") - .build(); - - public static final Schema STORE_SALES_SCHEMA = - Schema.builder() - .addNullableField("ss_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("ss_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("ss_item_sk", Schema.FieldType.INT64) - .addNullableField("ss_customer_sk", Schema.FieldType.STRING) - .addNullableField("ss_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ss_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ss_addr_sk", Schema.FieldType.INT64) - .addNullableField("ss_store_sk", Schema.FieldType.INT64) - .addNullableField("ss_promo_sk", Schema.FieldType.INT64) - .addNullableField("ss_ticket_number", Schema.FieldType.INT64) - .addNullableField("ss_quantity", Schema.FieldType.INT64) - .addNullableField("ss_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ss_list_price", Schema.FieldType.FLOAT) - .addNullableField("ss_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("ss_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("ss_net_paid", Schema.FieldType.FLOAT) - .addNullableField("ss_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("ss_net_profit", Schema.FieldType.FLOAT) - .build(); - - public static final Schema DATE_DIM_SCHEMA = - Schema.builder() - .addNullableField("d_date_sk", Schema.FieldType.INT64) - .addNullableField("d_date_id", Schema.FieldType.STRING) - .addNullableField("d_date", Schema.FieldType.STRING) - .addNullableField("d_month_seq", Schema.FieldType.INT64) - .addNullableField("d_week_seq", Schema.FieldType.INT64) - .addNullableField("d_quarter_seq", Schema.FieldType.INT64) - .addNullableField("d_year", Schema.FieldType.INT64) - .addNullableField("d_dow", Schema.FieldType.INT64) - .addNullableField("d_moy", Schema.FieldType.INT64) - .addNullableField("d_dom", Schema.FieldType.INT64) - .addNullableField("d_qoy", Schema.FieldType.INT64) - .addNullableField("d_fy_year", Schema.FieldType.INT64) - .addNullableField("d_fy_quarter_seq", Schema.FieldType.INT64) - .addNullableField("d_fy_week_seq", Schema.FieldType.INT64) - .addNullableField("d_day_name", Schema.FieldType.STRING) - .addNullableField("d_quarter_name", Schema.FieldType.STRING) - .addNullableField("d_holiday", Schema.FieldType.STRING) - .addNullableField("d_weekend", Schema.FieldType.STRING) - .addNullableField("d_following_holiday", Schema.FieldType.STRING) - .addNullableField("d_first_dom", Schema.FieldType.INT64) - .addNullableField("d_last_dom", Schema.FieldType.INT64) - .addNullableField("d_same_day_ly", Schema.FieldType.INT64) - .addNullableField("d_same_day_lq", Schema.FieldType.INT64) - .addNullableField("d_current_day", Schema.FieldType.STRING) - .addNullableField("d_current_week", Schema.FieldType.STRING) - .addNullableField("d_current_month", Schema.FieldType.STRING) - .addNullableField("d_current_quarter", Schema.FieldType.STRING) - .addNullableField("d_current_year", Schema.FieldType.STRING) - .build(); - - public static final Schema ITEM_SCHEMA = - Schema.builder() - .addNullableField("i_item_sk", Schema.FieldType.INT64) - .addNullableField("i_item_id", Schema.FieldType.STRING) - .addNullableField("i_rec_start_date", Schema.FieldType.DATETIME) - .addNullableField("i_rec_end_date", Schema.FieldType.DATETIME) - .addNullableField("i_item_desc", Schema.FieldType.STRING) - .addNullableField("i_current_price", Schema.FieldType.FLOAT) - .addNullableField("i_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("i_brand_id", Schema.FieldType.INT64) - .addNullableField("i_brand", Schema.FieldType.STRING) - .addNullableField("i_class_id", Schema.FieldType.INT64) - .addNullableField("i_class", Schema.FieldType.STRING) - .addNullableField("i_category_id", Schema.FieldType.INT64) - .addNullableField("i_category", Schema.FieldType.STRING) - .addNullableField("i_manufact_id", Schema.FieldType.INT64) - .addNullableField("i_manufact", Schema.FieldType.STRING) - .addNullableField("i_size", Schema.FieldType.STRING) - .addNullableField("i_formulation", Schema.FieldType.STRING) - .addNullableField("i_color", Schema.FieldType.STRING) - .addNullableField("i_units", Schema.FieldType.STRING) - .addNullableField("i_container", Schema.FieldType.STRING) - .addNullableField("i_manager_id", Schema.FieldType.INT64) - .addNullableField("i_product_name", Schema.FieldType.STRING) - .build(); - - public static final Schema INVENTORY_SCHEMA = - Schema.builder() - .addNullableField("inv_date_sk", Schema.FieldType.INT64) - .addNullableField("inv_item_sk", Schema.FieldType.INT64) - .addNullableField("inv_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("inv_quantity_on_hand", Schema.FieldType.INT64) - .build(); - - public static final Schema CATALOG_SALES_SCHEMA = - Schema.builder() - .addNullableField("cs_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("cs_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_date_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_customer_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_addr_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_customer_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_addr_sk", Schema.FieldType.INT64) - .addNullableField("cs_call_center_sk", Schema.FieldType.INT64) - .addNullableField("cs_catalog_page_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_mode_sk", Schema.FieldType.INT64) - .addNullableField("cs_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("cs_item_sk", Schema.FieldType.INT64) - .addNullableField("cs_promo_sk", Schema.FieldType.INT64) - .addNullableField("cs_order_number", Schema.FieldType.INT64) - .addNullableField("cs_quantity", Schema.FieldType.INT64) - .addNullableField("cs_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_list_price", Schema.FieldType.FLOAT) - .addNullableField("cs_sales_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_ship_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_ship", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_net_profit", Schema.FieldType.FLOAT) - .build(); - - public static final Schema ORDER_SCHEMA = - Schema.builder() - .addInt64Field("o_orderkey") - .addInt64Field("o_custkey") - .addStringField("o_orderstatus") - .addFloatField("o_totalprice") - .addStringField("o_orderdate") - .addStringField("o_orderpriority") - .addStringField("o_clerk") - .addInt64Field("o_shippriority") - .addStringField("o_comment") - .build(); - - public static final Schema CUSTOMER_SCHEMA = - Schema.builder() - .addInt64Field("c_custkey") - .addStringField("c_name") - .addStringField("c_address") - .addInt64Field("c_nationkey") - .addStringField("c_phone") - .addFloatField("c_acctbal") - .addStringField("c_mktsegment") - .addStringField("c_comment") - .build(); - - public static final Schema CUSTOMER_DS_SCHEMA = - Schema.builder() - .addNullableField("c_customer_sk", Schema.FieldType.INT64) - .addNullableField("c_customer_id", Schema.FieldType.STRING) - .addNullableField("c_current_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("c_current_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("c_current_addr_sk", Schema.FieldType.INT64) - .addNullableField("c_first_shipto_date_sk", Schema.FieldType.INT64) - .addNullableField("c_first_sales_date_sk", Schema.FieldType.INT64) - .addNullableField("c_salutation", Schema.FieldType.STRING) - .addNullableField("c_first_name", Schema.FieldType.STRING) - .addNullableField("c_last_name", Schema.FieldType.STRING) - .addNullableField("c_preferred_cust_flag", Schema.FieldType.STRING) - .addNullableField("c_birth_day", Schema.FieldType.INT64) - .addNullableField("c_birth_month", Schema.FieldType.INT64) - .addNullableField("c_birth_year", Schema.FieldType.INT64) - .addNullableField("c_birth_country", Schema.FieldType.STRING) - .addNullableField("c_login", Schema.FieldType.STRING) - .addNullableField("c_email_address", Schema.FieldType.STRING) - .addNullableField("c_last_review_date", Schema.FieldType.STRING) - .build(); - - public static final Schema LINEITEM_SCHEMA = - Schema.builder() - .addInt64Field("l_orderkey") - .addInt64Field("l_partkey") - .addInt64Field("l_suppkey") - .addInt64Field("l_linenumber") - .addFloatField("l_quantity") - .addFloatField("l_extendedprice") - .addFloatField("l_discount") - .addFloatField("l_tax") - .addStringField("l_returnflag") - .addStringField("l_linestatus") - .addStringField("l_shipdate") - .addStringField("l_commitdate") - .addStringField("l_receiptdate") - .addStringField("l_shipinstruct") - .addStringField("l_shipmode") - .addStringField("l_comment") - .build(); - - public static final Schema PARTSUPP_SCHEMA = - Schema.builder() - .addInt64Field("ps_partkey") - .addInt64Field("ps_suppkey") - .addInt64Field("ps_availqty") - .addFloatField("ps_supplycost") - .addStringField("ps_comment") - .build(); - - public static final Schema REGION_SCHEMA = - Schema.builder() - .addInt64Field("r_regionkey") - .addStringField("r_name") - .addStringField("r_comment") - .build(); - - public static final Schema SUPPLIER_SCHEMA = - Schema.builder() - .addInt64Field("s_suppkey") - .addStringField("s_name") - .addStringField("s_address") - .addInt64Field("s_nationkey") - .addStringField("s_phone") - .addFloatField("s_acctbal") - .addStringField("s_comment") - .build(); - - public static final Schema PART_SCHEMA = - Schema.builder() - .addInt64Field("p_partkey") - .addStringField("p_name") - .addStringField("p_mfgr") - .addStringField("p_brand") - .addStringField("p_type") - .addInt64Field("p_size") - .addStringField("p_container") - .addFloatField("p_retailprice") - .addStringField("p_comment") - .build(); - - public static final Schema NATION_SCHEMA = - Schema.builder() - .addInt64Field("n_nationkey") - .addStringField("n_name") - .addInt64Field("n_regionkey") - .addStringField("n_comment") - .build(); - - public static final Schema PROMOTION_SCHEMA = - Schema.builder() - .addNullableField("p_promo_sk", Schema.FieldType.INT64) - .addNullableField("p_promo_id", Schema.FieldType.STRING) - .addNullableField("p_start_date_sk", Schema.FieldType.INT64) - .addNullableField("p_end_date_sk", Schema.FieldType.INT64) - .addNullableField("p_item_sk", Schema.FieldType.INT64) - .addNullableField("p_cost", Schema.FieldType.FLOAT) - .addNullableField("p_response_target", Schema.FieldType.INT64) - .addNullableField("p_promo_name", Schema.FieldType.STRING) - .addNullableField("p_channel_dmail", Schema.FieldType.STRING) - .addNullableField("p_channel_email", Schema.FieldType.STRING) - .addNullableField("p_channel_catalog", Schema.FieldType.STRING) - .addNullableField("p_channel_tv", Schema.FieldType.STRING) - .addNullableField("p_channel_radio", Schema.FieldType.STRING) - .addNullableField("p_channel_press", Schema.FieldType.STRING) - .addNullableField("p_channel_event", Schema.FieldType.STRING) - .addNullableField("p_channel_demo", Schema.FieldType.STRING) - .addNullableField("p_channel_details", Schema.FieldType.STRING) - .addNullableField("p_purpose", Schema.FieldType.STRING) - .addNullableField("p_discount_active", Schema.FieldType.STRING) - .build(); - - public static final Schema CUSTOMER_DEMOGRAPHIC_SCHEMA = - Schema.builder() - .addNullableField("cd_demo_sk", Schema.FieldType.INT64) - .addNullableField("cd_gender", Schema.FieldType.STRING) - .addNullableField("cd_marital_status", Schema.FieldType.STRING) - .addNullableField("cd_education_status", Schema.FieldType.STRING) - .addNullableField("cd_purchase_estimate", Schema.FieldType.INT64) - .addNullableField("cd_credit_rating", Schema.FieldType.STRING) - .addNullableField("cd_dep_count", Schema.FieldType.INT64) - .addNullableField("cd_dep_employed_count", Schema.FieldType.INT64) - .addNullableField("cd_dep_college_count", Schema.FieldType.INT64) - .build(); - - public static final Schema WEB_SALES_SCHEMA = - Schema.builder() - .addNullableField("ws_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("ws_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_date_sk", Schema.FieldType.INT64) - .addNullableField("ws_item_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_customer_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_addr_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_customer_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_addr_sk", Schema.FieldType.INT64) - .addNullableField("ws_web_page_sk", Schema.FieldType.INT64) - .addNullableField("ws_web_site_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_mode_sk", Schema.FieldType.INT64) - .addNullableField("ws_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("ws_promo_sk", Schema.FieldType.INT64) - .addNullableField("ws_order_number", Schema.FieldType.INT64) - .addNullableField("ws_quantity", Schema.FieldType.INT64) - .addNullableField("ws_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_list_price", Schema.FieldType.FLOAT) - .addNullableField("ws_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_ship_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_ship", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_net_profit", Schema.FieldType.FLOAT) - .build(); -} diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java deleted file mode 100644 index 6134df334e..0000000000 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nemo.examples.beam.tpch; - -import com.google.common.collect.ImmutableMap; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.extensions.sql.SqlTransform; -import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.*; -import org.apache.commons.csv.CSVFormat; -import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; -import org.apache.nemo.compiler.frontend.beam.NemoRunner; -import org.apache.nemo.examples.beam.GenericSourceSink; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.*; -import java.util.stream.Stream; - -import static org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.beamRow2CsvLine; - -/** - * A simple SQL application. - * (Copied and adapted from https://github.com/apache/beam/pull/6240) - */ -public final class Tpch { - private static final Logger LOG = LoggerFactory.getLogger(Tpch.class.getName()); - - /** - * Private Constructor. - */ - private Tpch() { - } - - /** - * Row csv formats. - */ - static class RowToCsv extends PTransform, PCollection> implements Serializable { - - private final CSVFormat csvFormat; - - RowToCsv(final CSVFormat csvFormat) { - this.csvFormat = csvFormat; - } - - public CSVFormat getCsvFormat() { - return csvFormat; - } - - @Override - public PCollection expand(final PCollection input) { - return input.apply( - "rowToCsv", - MapElements.into(TypeDescriptors.strings()).via(row -> beamRow2CsvLine(row, csvFormat))); - } - } - - private static PCollectionTuple getHTables(final Pipeline pipeline, - final CSVFormat csvFormat, - final String inputDirectory, - final String query) { - final ImmutableMap hSchemas = ImmutableMap.builder() - .put("lineitem", Schemas.LINEITEM_SCHEMA) - .put("customer", Schemas.CUSTOMER_SCHEMA) - .put("orders", Schemas.ORDER_SCHEMA) - - .put("supplier", Schemas.SUPPLIER_SCHEMA) - .put("nation", Schemas.NATION_SCHEMA) - .put("region", Schemas.REGION_SCHEMA) - - .put("part", Schemas.PART_SCHEMA) - .put("partsupp", Schemas.PARTSUPP_SCHEMA) - /* - .put("store_sales", Schemas.STORE_SALES_SCHEMA) - .put("catalog_sales", Schemas.CATALOG_SALES_SCHEMA) - .put("item", Schemas.ITEM_SCHEMA) - .put("date_dim", Schemas.DATE_DIM_SCHEMA) - .put("promotion", Schemas.PROMOTION_SCHEMA) - .put("customer_demographics", Schemas.CUSTOMER_DEMOGRAPHIC_SCHEMA) - .put("web_sales", Schemas.WEB_SALES_SCHEMA) - .put("inventory", Schemas.INVENTORY_SCHEMA) - */ - .build(); - - PCollectionTuple tables = PCollectionTuple.empty(pipeline); - for (final Map.Entry tableSchema : hSchemas.entrySet()) { - final String tableName = tableSchema.getKey(); - - if (query.contains(tableName)) { - LOG.info("HIT: tablename {}", tableName); - - final String filePattern = inputDirectory + tableSchema.getKey() + ".tbl*"; - final PCollection table = GenericSourceSink.read(pipeline, filePattern) - .apply("StringToRow", new TextTableProvider.CsvToRow(tableSchema.getValue(), csvFormat)) - .setCoder(tableSchema.getValue().getRowCoder()) - .setName(tableSchema.getKey()); - tables = tables.and(new TupleTag<>(tableSchema.getKey()), table); - - LOG.info("FilePattern {} / Tables {}", filePattern, tables); - } - } - return tables; - } - - - /** - * @param args arguments. - */ - public static void main(final String[] args) { - final String queryFilePath = args[0]; - final String inputDirectory = args[1]; - final String outputFilePath = args[2]; - - LOG.info("{} / {} / {}", queryFilePath, inputDirectory, outputFilePath); - - final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class); - options.setRunner(NemoRunner.class); - options.setJobName("TPC-H"); - final Pipeline p = Pipeline.create(options); - - final String queryString = getQueryString(queryFilePath); - // Create tables - final CSVFormat csvFormat = CSVFormat.MYSQL - .withDelimiter('|') - .withNullString("") - .withTrailingDelimiter(); - final PCollectionTuple tables = getHTables(p, csvFormat, inputDirectory, queryString); - - // Run the TPC-H query - final PCollection result = tables.apply(SqlTransform.query(queryString)); - - final PCollection resultToWrite = result.apply(MapElements.into(TypeDescriptors.strings()).via( - new SerializableFunction() { - @Override - public String apply(final Row input) { - System.out.println(input.getValues().toString()); - return input.getValues().toString(); - } - })); - - GenericSourceSink.write(resultToWrite, outputFilePath); - - // Then run - p.run(); - } - - private static String getQueryString(final String queryFilePath) { - final List lines = new ArrayList<>(); - try (final Stream stream = Files.lines(Paths.get(queryFilePath))) { - stream.forEach(lines::add); - } catch (IOException e) { - throw new RuntimeException(e); - } - - System.out.println(lines); - - final StringBuilder sb = new StringBuilder(); - lines.forEach(line -> { - sb.append(" "); - sb.append(line); - }); - - final String concate = sb.toString(); - System.out.println(concate); - final String cleanOne = concate.replaceAll("\n", " "); - System.out.println(cleanOne); - final String cleanTwo = cleanOne.replaceAll("\t", " "); - System.out.println(cleanTwo); - - return cleanTwo; - } -} From 31a3456aaa6ed471c5db9adeed3df9e6c533e782 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:32:18 +0900 Subject: [PATCH 202/235] refactoring --- .../nemo/examples/beam/GenericSourceSink.java | 2 +- .../apache/nemo/examples/beam/WordCountITCase.java | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java index 3ebcb4cef2..2ab09a7f32 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java @@ -42,7 +42,7 @@ * Helper class for handling source/sink in a generic way. * Assumes String-type PCollections. */ -public final class GenericSourceSink { +final class GenericSourceSink { /** * Default Constructor. */ diff --git a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java index 3a46e82b87..e31a8c5a4d 100644 --- a/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java +++ b/examples/beam/src/test/java/org/apache/nemo/examples/beam/WordCountITCase.java @@ -23,9 +23,6 @@ import org.apache.nemo.common.test.ExampleTestArgs; import org.apache.nemo.common.test.ExampleTestUtil; import org.apache.nemo.compiler.optimizer.policy.ConditionalLargeShufflePolicy; -import org.apache.nemo.compiler.optimizer.policy.CrailPolicy; -import org.apache.nemo.compiler.optimizer.policy.DefaultPolicy; -import org.apache.nemo.compiler.optimizer.policy.DisaggregationPolicy; import org.apache.nemo.examples.beam.policy.*; import org.junit.After; import org.junit.Before; @@ -75,7 +72,6 @@ public void test() throws Exception { .build()); } - @Test (timeout = ExampleTestArgs.TIMEOUT) public void testLargeShuffle() throws Exception { JobLauncher.main(builder @@ -131,14 +127,4 @@ public void testSpeculativeExecution() throws Exception { .addOptimizationPolicy(AggressiveSpeculativeCloningPolicyParallelismFive.class.getCanonicalName()) .build()); } - - @Test (timeout = ExampleTestArgs.TIMEOUT) - public void testCrailPolicy() throws Exception{ - JobLauncher.main(builder - .addResourceJson(executorResourceFileName) - .addJobId(WordCountITCase.class.getSimpleName() + " CrailPolicy") - .addMaxTaskAttempt(Integer.MAX_VALUE) - .addOptimizationPolicy(CrailPolicy.class.getCanonicalName()) - .build()); - } } From 23dbd8231e95bf7527b6710b8e3afe6079d17ade Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:46:53 +0900 Subject: [PATCH 203/235] refactoring --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 44452c5e2e..0386a93c22 100644 --- a/pom.xml +++ b/pom.xml @@ -65,7 +65,7 @@ under the License. 2.13.0 2.0.0-beta.5 - 2.19.1 + 3.0.0-M1 4.12 @@ -165,6 +165,7 @@ under the License. 1.2-incubating-SNAPSHOT + From 0d849622e382ff38442a510806cdd94f72ffbb9a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Wed, 5 Jun 2019 15:55:33 +0900 Subject: [PATCH 204/235] refactoring --- .../nemo/runtime/executor/data/stores/RemoteFileStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java index b377800a51..d0e015e815 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java @@ -21,7 +21,7 @@ import org.apache.reef.tang.annotations.DefaultImplementation; /** - * Interface for remote block stores (e.g., GlusterFS, ...). + * Interface for remote block stores (e.g., GlusterFS, CrailFS...). */ @DefaultImplementation(CrailFileStore.class) From cc8827e4d232689d8d74a384d284a7087bfe0dc9 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Fri, 7 Jun 2019 15:46:40 +0900 Subject: [PATCH 205/235] checkstyle --- .../nemo/runtime/executor/data/block/CrailFileBlock.java | 1 + .../nemo/runtime/executor/data/stores/CrailFileStore.java | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 1c1ba15a22..5be4a21beb 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -107,6 +107,7 @@ private void writeToFile(final Iterable> serializedPartit } fileOutputStream.close(); } + /** * Writes an element to non-committed block. * Invariant: This should not be invoked after this block is committed. diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index c0afdea50b..67eda4868b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -148,8 +148,8 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { } catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e) { - e.printStackTrace(); - throw new BlockFetchException(e); + e.printStackTrace(); + throw new BlockFetchException(e); } } From d31946580eeda0c25f735c608b5bade6c3802766 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 15:00:33 +0900 Subject: [PATCH 206/235] option for using CrailFS --- .../java/org/apache/nemo/conf/JobConf.java | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index ca16edd58c..fbc0181f9a 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -77,11 +77,19 @@ public final class GlusterVolumeDirectory implements Name { } /** - * Directory points the CrailFileSystem to store files. - * - */ - @NamedParameter(doc = "Root Directory of CrailFS volume", short_name = "crail_dir", default_value = "") - public final class CrailVolumeDirectory implements Name { + * Directory points the CrailFileStore volume used to store files in remote fashion. + * The directory has to be already created to give it as an option. (else exception is thrown) + */ + @NamedParameter(doc = "Directory points the CrailFS volume", short_name = "crail_dir", default_value = "") + public final class CrailVolumeDirectory implements Name { + } + + /** + * RemoteFileStore option specification. Two choices are available: GlusterFileStore or CrailFileStore. + * Default is the former one. + */ + @NamedParameter(doc = "Option for RemoteFileStore", short_name = "remote_option", default_value = "glusterFS") + public final class RemoteFileStoreOpt implements Name { } //////////////////////////////// Client-Driver RPC From f141d7be917e972049cfe40de0a5492ed7a8eb87 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 15:29:54 +0900 Subject: [PATCH 207/235] testing crail option --- .../java/org/apache/nemo/client/JobLauncher.java | 5 +++++ .../main/java/org/apache/nemo/conf/JobConf.java | 2 +- .../java/org/apache/nemo/driver/NemoDriver.java | 14 +++++++++++++- .../executor/data/stores/RemoteFileStore.java | 3 +-- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/client/src/main/java/org/apache/nemo/client/JobLauncher.java b/client/src/main/java/org/apache/nemo/client/JobLauncher.java index e849acae3b..d3c80a8936 100644 --- a/client/src/main/java/org/apache/nemo/client/JobLauncher.java +++ b/client/src/main/java/org/apache/nemo/client/JobLauncher.java @@ -27,6 +27,8 @@ import org.apache.nemo.runtime.common.message.MessageEnvironment; import org.apache.nemo.runtime.common.message.MessageParameters; import org.apache.commons.lang3.SerializationUtils; +import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; +import org.apache.nemo.runtime.executor.data.stores.RemoteFileStore; import org.apache.nemo.runtime.master.scheduler.Scheduler; import org.apache.reef.client.DriverConfiguration; import org.apache.reef.client.DriverLauncher; @@ -397,6 +399,8 @@ public static Configuration getJobConf(final String[] args) throws IOException, cl.registerShortNameOfClass(JobConf.MaxTaskAttempt.class); cl.registerShortNameOfClass(JobConf.FileDirectory.class); cl.registerShortNameOfClass(JobConf.GlusterVolumeDirectory.class); + cl.registerShortNameOfClass(JobConf.CrailVolumeDirectory.class); + cl.registerShortNameOfClass(JobConf.RemoteFileStoreOpt.class); cl.registerShortNameOfClass(JobConf.PartitionTransportServerPort.class); cl.registerShortNameOfClass(JobConf.PartitionTransportServerBacklog.class); cl.registerShortNameOfClass(JobConf.PartitionTransportServerNumListeningThreads.class); @@ -405,6 +409,7 @@ public static Configuration getJobConf(final String[] args) throws IOException, cl.registerShortNameOfClass(JobConf.MaxNumDownloadsForARuntimeEdge.class); cl.registerShortNameOfClass(JobConf.SchedulerImplClassName.class); cl.processCommandLine(args); + if (JobConf.RemoteFileStoreOpt.class.getName() == "Crail") confBuilder.bindImplementation(RemoteFileStore.class, CrailFileStore.class); return confBuilder.build(); } diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index fbc0181f9a..966e82064e 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -88,7 +88,7 @@ public final class CrailVolumeDirectory implements Name { * RemoteFileStore option specification. Two choices are available: GlusterFileStore or CrailFileStore. * Default is the former one. */ - @NamedParameter(doc = "Option for RemoteFileStore", short_name = "remote_option", default_value = "glusterFS") + @NamedParameter(doc = "Option for RemoteFileStore", short_name = "remote_option", default_value = "Gluster") public final class RemoteFileStoreOpt implements Name { } diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index b8d5885866..2f4d6a8a0a 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -24,6 +24,8 @@ import org.apache.nemo.runtime.common.RuntimeIdManager; import org.apache.nemo.runtime.common.comm.ControlMessage; import org.apache.nemo.runtime.common.message.MessageParameters; +import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; +import org.apache.nemo.runtime.executor.data.stores.RemoteFileStore; import org.apache.nemo.runtime.master.ClientRPC; import org.apache.nemo.runtime.master.BroadcastManagerMaster; import org.apache.nemo.runtime.master.RuntimeMaster; @@ -42,6 +44,7 @@ import org.apache.reef.io.network.util.StringIdentifierFactory; import org.apache.reef.tang.Configuration; import org.apache.reef.tang.Configurations; +import org.apache.reef.tang.JavaConfigurationBuilder; import org.apache.reef.tang.Tang; import org.apache.reef.tang.annotations.Parameter; import org.apache.reef.tang.annotations.Unit; @@ -79,6 +82,7 @@ public final class NemoDriver { private final String localDirectory; private final String glusterDirectory; private final ClientRPC clientRPC; + private final String remoteOpt; private static ExecutorService runnerThread = Executors.newSingleThreadExecutor( new BasicThreadFactory.Builder().namingPattern("User App thread-%d").build()); @@ -97,7 +101,8 @@ private NemoDriver(final UserApplicationRunner userApplicationRunner, @Parameter(JobConf.BandwidthJSONContents.class) final String bandwidthString, @Parameter(JobConf.JobId.class) final String jobId, @Parameter(JobConf.FileDirectory.class) final String localDirectory, - @Parameter(JobConf.GlusterVolumeDirectory.class) final String glusterDirectory) { + @Parameter(JobConf.GlusterVolumeDirectory.class) final String glusterDirectory, + @Parameter(JobConf.RemoteFileStoreOpt.class) final String remoteOpt) { IdManager.setInDriver(); this.userApplicationRunner = userApplicationRunner; this.runtimeMaster = runtimeMaster; @@ -109,6 +114,7 @@ private NemoDriver(final UserApplicationRunner userApplicationRunner, this.glusterDirectory = glusterDirectory; this.handler = new RemoteClientMessageLoggingHandler(client); this.clientRPC = clientRPC; + this.remoteOpt = remoteOpt; // TODO #69: Support job-wide execution property ResourceSitePass.setBandwidthSpecificationString(bandwidthString); clientRPC.registerHandler(ControlMessage.ClientToDriverMessageType.LaunchDAG, message -> { @@ -232,6 +238,12 @@ private Configuration getExecutorConfiguration(final String executorId) { .set(JobConf.JOB_ID, jobId) .build(); +// final JavaConfigurationBuilder jcb = Tang.Factory.getTang().newConfigurationBuilder(); +// jcb.bindImplementation(RemoteFileStore.class, CrailFileStore.class); +// final Configuration remoteConf = jcb.build(); + + + final Configuration contextConfiguration = ContextConfiguration.CONF .set(ContextConfiguration.IDENTIFIER, executorId) // We set: contextId = executorId .set(ContextConfiguration.ON_CONTEXT_STARTED, NemoContext.ContextStartHandler.class) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java index d0e015e815..7fa6189fcc 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/RemoteFileStore.java @@ -24,7 +24,6 @@ * Interface for remote block stores (e.g., GlusterFS, CrailFS...). */ -@DefaultImplementation(CrailFileStore.class) -//@DefaultImplementation(GlusterFileStore.class) +@DefaultImplementation(GlusterFileStore.class) public interface RemoteFileStore extends BlockStore { } From 2f50495fd6a9618cb85fcaafcb2a94f83f419dc1 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 15:48:05 +0900 Subject: [PATCH 208/235] logging --- client/src/main/java/org/apache/nemo/client/JobLauncher.java | 1 + 1 file changed, 1 insertion(+) diff --git a/client/src/main/java/org/apache/nemo/client/JobLauncher.java b/client/src/main/java/org/apache/nemo/client/JobLauncher.java index d3c80a8936..cef0a053be 100644 --- a/client/src/main/java/org/apache/nemo/client/JobLauncher.java +++ b/client/src/main/java/org/apache/nemo/client/JobLauncher.java @@ -409,6 +409,7 @@ public static Configuration getJobConf(final String[] args) throws IOException, cl.registerShortNameOfClass(JobConf.MaxNumDownloadsForARuntimeEdge.class); cl.registerShortNameOfClass(JobConf.SchedulerImplClassName.class); cl.processCommandLine(args); + LOG.info("HY: {}", JobConf.RemoteFileStoreOpt.class.getName()); if (JobConf.RemoteFileStoreOpt.class.getName() == "Crail") confBuilder.bindImplementation(RemoteFileStore.class, CrailFileStore.class); return confBuilder.build(); } From 47ee30cddd193a98492826707f292b2743d5a3d2 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 15:58:05 +0900 Subject: [PATCH 209/235] logging --- client/src/main/java/org/apache/nemo/client/JobLauncher.java | 2 -- .../driver/src/main/java/org/apache/nemo/driver/NemoDriver.java | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/client/src/main/java/org/apache/nemo/client/JobLauncher.java b/client/src/main/java/org/apache/nemo/client/JobLauncher.java index cef0a053be..9f96f01b1a 100644 --- a/client/src/main/java/org/apache/nemo/client/JobLauncher.java +++ b/client/src/main/java/org/apache/nemo/client/JobLauncher.java @@ -409,8 +409,6 @@ public static Configuration getJobConf(final String[] args) throws IOException, cl.registerShortNameOfClass(JobConf.MaxNumDownloadsForARuntimeEdge.class); cl.registerShortNameOfClass(JobConf.SchedulerImplClassName.class); cl.processCommandLine(args); - LOG.info("HY: {}", JobConf.RemoteFileStoreOpt.class.getName()); - if (JobConf.RemoteFileStoreOpt.class.getName() == "Crail") confBuilder.bindImplementation(RemoteFileStore.class, CrailFileStore.class); return confBuilder.build(); } diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 2f4d6a8a0a..fe6236985f 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -115,6 +115,7 @@ private NemoDriver(final UserApplicationRunner userApplicationRunner, this.handler = new RemoteClientMessageLoggingHandler(client); this.clientRPC = clientRPC; this.remoteOpt = remoteOpt; + LOG.info("HY: {}", remoteOpt); // TODO #69: Support job-wide execution property ResourceSitePass.setBandwidthSpecificationString(bandwidthString); clientRPC.registerHandler(ControlMessage.ClientToDriverMessageType.LaunchDAG, message -> { From 5d3d7fdc119908ec13419a2e61faf8260456ba29 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:07:01 +0900 Subject: [PATCH 210/235] tests --- .../src/main/java/org/apache/nemo/driver/NemoDriver.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index fe6236985f..5abc6302c6 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -115,7 +115,6 @@ private NemoDriver(final UserApplicationRunner userApplicationRunner, this.handler = new RemoteClientMessageLoggingHandler(client); this.clientRPC = clientRPC; this.remoteOpt = remoteOpt; - LOG.info("HY: {}", remoteOpt); // TODO #69: Support job-wide execution property ResourceSitePass.setBandwidthSpecificationString(bandwidthString); clientRPC.registerHandler(ControlMessage.ClientToDriverMessageType.LaunchDAG, message -> { @@ -239,9 +238,9 @@ private Configuration getExecutorConfiguration(final String executorId) { .set(JobConf.JOB_ID, jobId) .build(); -// final JavaConfigurationBuilder jcb = Tang.Factory.getTang().newConfigurationBuilder(); -// jcb.bindImplementation(RemoteFileStore.class, CrailFileStore.class); -// final Configuration remoteConf = jcb.build(); + final JavaConfigurationBuilder jcb = Tang.Factory.getTang().newConfigurationBuilder(); + if (remoteOpt.equals("crail")) jcb.bindImplementation(RemoteFileStore.class, CrailFileStore.class); + final Configuration remoteConf = jcb.build(); @@ -254,7 +253,7 @@ private Configuration getExecutorConfiguration(final String executorId) { final Configuration ncsConfiguration = getExecutorNcsConfiguration(); final Configuration messageConfiguration = getExecutorMessageConfiguration(executorId); - return Configurations.merge(executorConfiguration, contextConfiguration, ncsConfiguration, messageConfiguration); + return Configurations.merge(executorConfiguration, contextConfiguration, ncsConfiguration, messageConfiguration, remoteConf); } private Configuration getExecutorNcsConfiguration() { From 7468fddb3fdee93bf53c74627dc983d8eb43bf2a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:15:11 +0900 Subject: [PATCH 211/235] checkstyle --- .../driver/src/main/java/org/apache/nemo/driver/NemoDriver.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index 5abc6302c6..fc6eb63b33 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -242,8 +242,6 @@ private Configuration getExecutorConfiguration(final String executorId) { if (remoteOpt.equals("crail")) jcb.bindImplementation(RemoteFileStore.class, CrailFileStore.class); final Configuration remoteConf = jcb.build(); - - final Configuration contextConfiguration = ContextConfiguration.CONF .set(ContextConfiguration.IDENTIFIER, executorId) // We set: contextId = executorId .set(ContextConfiguration.ON_CONTEXT_STARTED, NemoContext.ContextStartHandler.class) From 16de92e320b2b0b4603ccea1a7d4fd98abfd2ad4 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:22:19 +0900 Subject: [PATCH 212/235] checkstyle and tests --- client/src/main/java/org/apache/nemo/client/JobLauncher.java | 2 -- .../pass/compiletime/annotating/CrailEdgeDataStorePass.java | 2 +- .../apache/nemo/compiler/optimizer/policy/CrailPolicy.java | 2 +- .../apache/nemo/runtime/executor/data/BlockManagerWorker.java | 4 +++- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/client/src/main/java/org/apache/nemo/client/JobLauncher.java b/client/src/main/java/org/apache/nemo/client/JobLauncher.java index 9f96f01b1a..817cda0432 100644 --- a/client/src/main/java/org/apache/nemo/client/JobLauncher.java +++ b/client/src/main/java/org/apache/nemo/client/JobLauncher.java @@ -27,8 +27,6 @@ import org.apache.nemo.runtime.common.message.MessageEnvironment; import org.apache.nemo.runtime.common.message.MessageParameters; import org.apache.commons.lang3.SerializationUtils; -import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; -import org.apache.nemo.runtime.executor.data.stores.RemoteFileStore; import org.apache.nemo.runtime.master.scheduler.Scheduler; import org.apache.reef.client.DriverConfiguration; import org.apache.reef.client.DriverLauncher; diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java index a57de8abfb..b801b0a9f3 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java @@ -42,7 +42,7 @@ public CrailEdgeDataStorePass() { @Override public DAG apply(final DAG dag) { - dag.getVertices().forEach(vertex -> { // Initialize the DataStore of the DAG with GlusterFileStore. + dag.getVertices().forEach(vertex -> { // Initialize the DataStore of the DAG with CrailFileStore. final List inEdges = dag.getIncomingEdgesOf(vertex); inEdges.forEach(edge -> edge.setPropertyPermanently(DataStoreProperty.of(DataStoreProperty.Value.CrailFileStore))); diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java index 14312f3d37..b49bcf37e0 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java @@ -33,7 +33,7 @@ public final class CrailPolicy implements Policy { public static final PolicyBuilder BUILDER = new PolicyBuilder() - .registerCompileTimePass(new CrailEdgeDataStorePass()) //***확인 + .registerCompileTimePass(new CrailEdgeDataStorePass()) .registerCompileTimePass(new DefaultCompositePass()); private final Policy policy; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 3e99e7d155..3900b0315a 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -518,6 +518,7 @@ private static ControlMessage.BlockStore convertBlockStore( */ private static DataStoreProperty.Value convertBlockStore( final ControlMessage.BlockStore blockStoreType) { + LOG.info("HY: {}", blockStoreType.getClass().getName()); switch (blockStoreType) { case MEMORY: return DataStoreProperty.Value.MemoryStore; @@ -526,7 +527,8 @@ private static DataStoreProperty.Value convertBlockStore( case LOCAL_FILE: return DataStoreProperty.Value.LocalFileStore; case REMOTE_FILE: - return DataStoreProperty.Value.CrailFileStore; + if (blockStoreType.getClass().getName() == "CrailFileStore") return DataStoreProperty.Value.CrailFileStore; + else return DataStoreProperty.Value.GlusterFileStore; default: throw new UnsupportedBlockStoreException(new Exception("This block store is not yet supported")); } From ba15a5e20f21198e9954e8c3bb34172500cfa9a2 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:39:28 +0900 Subject: [PATCH 213/235] for TPC-H --- .../nemo/examples/beam/GenericSourceSink.java | 2 +- .../nemo/examples/beam/tpch/Schemas.java | 363 ++++++++++++++++++ .../apache/nemo/examples/beam/tpch/Tpch.java | 198 ++++++++++ 3 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java create mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java index 2ab09a7f32..3ebcb4cef2 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java @@ -42,7 +42,7 @@ * Helper class for handling source/sink in a generic way. * Assumes String-type PCollections. */ -final class GenericSourceSink { +public final class GenericSourceSink { /** * Default Constructor. */ diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java new file mode 100644 index 0000000000..f1bdd71971 --- /dev/null +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java @@ -0,0 +1,363 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nemo.examples.beam.tpch; + +import com.google.common.collect.ImmutableMap; +import org.apache.beam.sdk.schemas.Schema; + +/** + * A simple SQL application. + * (Copied and adapted from https://github.com/apache/beam/pull/6240) + */ +public final class Schemas { + /** + * Private. + */ + private Schemas() { + } + + public static final ImmutableMap COLUMN_PREFIX = ImmutableMap.builder() + .put("lineitem", "l_") + .put("customer", "c_") + .put("supplier", "s_") + .put("partsupp", "ps_") + .put("part", "p_") + .put("orders", "o_") + .put("nation", "n_") + .put("region", "r_") + .build(); + + public static final Schema STORE_SALES_SCHEMA = + Schema.builder() + .addNullableField("ss_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("ss_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("ss_item_sk", Schema.FieldType.INT64) + .addNullableField("ss_customer_sk", Schema.FieldType.STRING) + .addNullableField("ss_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ss_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ss_addr_sk", Schema.FieldType.INT64) + .addNullableField("ss_store_sk", Schema.FieldType.INT64) + .addNullableField("ss_promo_sk", Schema.FieldType.INT64) + .addNullableField("ss_ticket_number", Schema.FieldType.INT64) + .addNullableField("ss_quantity", Schema.FieldType.INT64) + .addNullableField("ss_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ss_list_price", Schema.FieldType.FLOAT) + .addNullableField("ss_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("ss_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("ss_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("ss_net_paid", Schema.FieldType.FLOAT) + .addNullableField("ss_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("ss_net_profit", Schema.FieldType.FLOAT) + .build(); + + public static final Schema DATE_DIM_SCHEMA = + Schema.builder() + .addNullableField("d_date_sk", Schema.FieldType.INT64) + .addNullableField("d_date_id", Schema.FieldType.STRING) + .addNullableField("d_date", Schema.FieldType.STRING) + .addNullableField("d_month_seq", Schema.FieldType.INT64) + .addNullableField("d_week_seq", Schema.FieldType.INT64) + .addNullableField("d_quarter_seq", Schema.FieldType.INT64) + .addNullableField("d_year", Schema.FieldType.INT64) + .addNullableField("d_dow", Schema.FieldType.INT64) + .addNullableField("d_moy", Schema.FieldType.INT64) + .addNullableField("d_dom", Schema.FieldType.INT64) + .addNullableField("d_qoy", Schema.FieldType.INT64) + .addNullableField("d_fy_year", Schema.FieldType.INT64) + .addNullableField("d_fy_quarter_seq", Schema.FieldType.INT64) + .addNullableField("d_fy_week_seq", Schema.FieldType.INT64) + .addNullableField("d_day_name", Schema.FieldType.STRING) + .addNullableField("d_quarter_name", Schema.FieldType.STRING) + .addNullableField("d_holiday", Schema.FieldType.STRING) + .addNullableField("d_weekend", Schema.FieldType.STRING) + .addNullableField("d_following_holiday", Schema.FieldType.STRING) + .addNullableField("d_first_dom", Schema.FieldType.INT64) + .addNullableField("d_last_dom", Schema.FieldType.INT64) + .addNullableField("d_same_day_ly", Schema.FieldType.INT64) + .addNullableField("d_same_day_lq", Schema.FieldType.INT64) + .addNullableField("d_current_day", Schema.FieldType.STRING) + .addNullableField("d_current_week", Schema.FieldType.STRING) + .addNullableField("d_current_month", Schema.FieldType.STRING) + .addNullableField("d_current_quarter", Schema.FieldType.STRING) + .addNullableField("d_current_year", Schema.FieldType.STRING) + .build(); + + public static final Schema ITEM_SCHEMA = + Schema.builder() + .addNullableField("i_item_sk", Schema.FieldType.INT64) + .addNullableField("i_item_id", Schema.FieldType.STRING) + .addNullableField("i_rec_start_date", Schema.FieldType.DATETIME) + .addNullableField("i_rec_end_date", Schema.FieldType.DATETIME) + .addNullableField("i_item_desc", Schema.FieldType.STRING) + .addNullableField("i_current_price", Schema.FieldType.FLOAT) + .addNullableField("i_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("i_brand_id", Schema.FieldType.INT64) + .addNullableField("i_brand", Schema.FieldType.STRING) + .addNullableField("i_class_id", Schema.FieldType.INT64) + .addNullableField("i_class", Schema.FieldType.STRING) + .addNullableField("i_category_id", Schema.FieldType.INT64) + .addNullableField("i_category", Schema.FieldType.STRING) + .addNullableField("i_manufact_id", Schema.FieldType.INT64) + .addNullableField("i_manufact", Schema.FieldType.STRING) + .addNullableField("i_size", Schema.FieldType.STRING) + .addNullableField("i_formulation", Schema.FieldType.STRING) + .addNullableField("i_color", Schema.FieldType.STRING) + .addNullableField("i_units", Schema.FieldType.STRING) + .addNullableField("i_container", Schema.FieldType.STRING) + .addNullableField("i_manager_id", Schema.FieldType.INT64) + .addNullableField("i_product_name", Schema.FieldType.STRING) + .build(); + + public static final Schema INVENTORY_SCHEMA = + Schema.builder() + .addNullableField("inv_date_sk", Schema.FieldType.INT64) + .addNullableField("inv_item_sk", Schema.FieldType.INT64) + .addNullableField("inv_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("inv_quantity_on_hand", Schema.FieldType.INT64) + .build(); + + public static final Schema CATALOG_SALES_SCHEMA = + Schema.builder() + .addNullableField("cs_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("cs_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_date_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_customer_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_bill_addr_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_customer_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_addr_sk", Schema.FieldType.INT64) + .addNullableField("cs_call_center_sk", Schema.FieldType.INT64) + .addNullableField("cs_catalog_page_sk", Schema.FieldType.INT64) + .addNullableField("cs_ship_mode_sk", Schema.FieldType.INT64) + .addNullableField("cs_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("cs_item_sk", Schema.FieldType.INT64) + .addNullableField("cs_promo_sk", Schema.FieldType.INT64) + .addNullableField("cs_order_number", Schema.FieldType.INT64) + .addNullableField("cs_quantity", Schema.FieldType.INT64) + .addNullableField("cs_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_list_price", Schema.FieldType.FLOAT) + .addNullableField("cs_sales_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("cs_ext_ship_cost", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_ship", Schema.FieldType.FLOAT) + .addNullableField("cs_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) + .addNullableField("cs_net_profit", Schema.FieldType.FLOAT) + .build(); + + public static final Schema ORDER_SCHEMA = + Schema.builder() + .addInt64Field("o_orderkey") + .addInt64Field("o_custkey") + .addStringField("o_orderstatus") + .addFloatField("o_totalprice") + .addStringField("o_orderdate") + .addStringField("o_orderpriority") + .addStringField("o_clerk") + .addInt64Field("o_shippriority") + .addStringField("o_comment") + .build(); + + public static final Schema CUSTOMER_SCHEMA = + Schema.builder() + .addInt64Field("c_custkey") + .addStringField("c_name") + .addStringField("c_address") + .addInt64Field("c_nationkey") + .addStringField("c_phone") + .addFloatField("c_acctbal") + .addStringField("c_mktsegment") + .addStringField("c_comment") + .build(); + + public static final Schema CUSTOMER_DS_SCHEMA = + Schema.builder() + .addNullableField("c_customer_sk", Schema.FieldType.INT64) + .addNullableField("c_customer_id", Schema.FieldType.STRING) + .addNullableField("c_current_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("c_current_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("c_current_addr_sk", Schema.FieldType.INT64) + .addNullableField("c_first_shipto_date_sk", Schema.FieldType.INT64) + .addNullableField("c_first_sales_date_sk", Schema.FieldType.INT64) + .addNullableField("c_salutation", Schema.FieldType.STRING) + .addNullableField("c_first_name", Schema.FieldType.STRING) + .addNullableField("c_last_name", Schema.FieldType.STRING) + .addNullableField("c_preferred_cust_flag", Schema.FieldType.STRING) + .addNullableField("c_birth_day", Schema.FieldType.INT64) + .addNullableField("c_birth_month", Schema.FieldType.INT64) + .addNullableField("c_birth_year", Schema.FieldType.INT64) + .addNullableField("c_birth_country", Schema.FieldType.STRING) + .addNullableField("c_login", Schema.FieldType.STRING) + .addNullableField("c_email_address", Schema.FieldType.STRING) + .addNullableField("c_last_review_date", Schema.FieldType.STRING) + .build(); + + public static final Schema LINEITEM_SCHEMA = + Schema.builder() + .addInt64Field("l_orderkey") + .addInt64Field("l_partkey") + .addInt64Field("l_suppkey") + .addInt64Field("l_linenumber") + .addFloatField("l_quantity") + .addFloatField("l_extendedprice") + .addFloatField("l_discount") + .addFloatField("l_tax") + .addStringField("l_returnflag") + .addStringField("l_linestatus") + .addStringField("l_shipdate") + .addStringField("l_commitdate") + .addStringField("l_receiptdate") + .addStringField("l_shipinstruct") + .addStringField("l_shipmode") + .addStringField("l_comment") + .build(); + + public static final Schema PARTSUPP_SCHEMA = + Schema.builder() + .addInt64Field("ps_partkey") + .addInt64Field("ps_suppkey") + .addInt64Field("ps_availqty") + .addFloatField("ps_supplycost") + .addStringField("ps_comment") + .build(); + + public static final Schema REGION_SCHEMA = + Schema.builder() + .addInt64Field("r_regionkey") + .addStringField("r_name") + .addStringField("r_comment") + .build(); + + public static final Schema SUPPLIER_SCHEMA = + Schema.builder() + .addInt64Field("s_suppkey") + .addStringField("s_name") + .addStringField("s_address") + .addInt64Field("s_nationkey") + .addStringField("s_phone") + .addFloatField("s_acctbal") + .addStringField("s_comment") + .build(); + + public static final Schema PART_SCHEMA = + Schema.builder() + .addInt64Field("p_partkey") + .addStringField("p_name") + .addStringField("p_mfgr") + .addStringField("p_brand") + .addStringField("p_type") + .addInt64Field("p_size") + .addStringField("p_container") + .addFloatField("p_retailprice") + .addStringField("p_comment") + .build(); + + public static final Schema NATION_SCHEMA = + Schema.builder() + .addInt64Field("n_nationkey") + .addStringField("n_name") + .addInt64Field("n_regionkey") + .addStringField("n_comment") + .build(); + + public static final Schema PROMOTION_SCHEMA = + Schema.builder() + .addNullableField("p_promo_sk", Schema.FieldType.INT64) + .addNullableField("p_promo_id", Schema.FieldType.STRING) + .addNullableField("p_start_date_sk", Schema.FieldType.INT64) + .addNullableField("p_end_date_sk", Schema.FieldType.INT64) + .addNullableField("p_item_sk", Schema.FieldType.INT64) + .addNullableField("p_cost", Schema.FieldType.FLOAT) + .addNullableField("p_response_target", Schema.FieldType.INT64) + .addNullableField("p_promo_name", Schema.FieldType.STRING) + .addNullableField("p_channel_dmail", Schema.FieldType.STRING) + .addNullableField("p_channel_email", Schema.FieldType.STRING) + .addNullableField("p_channel_catalog", Schema.FieldType.STRING) + .addNullableField("p_channel_tv", Schema.FieldType.STRING) + .addNullableField("p_channel_radio", Schema.FieldType.STRING) + .addNullableField("p_channel_press", Schema.FieldType.STRING) + .addNullableField("p_channel_event", Schema.FieldType.STRING) + .addNullableField("p_channel_demo", Schema.FieldType.STRING) + .addNullableField("p_channel_details", Schema.FieldType.STRING) + .addNullableField("p_purpose", Schema.FieldType.STRING) + .addNullableField("p_discount_active", Schema.FieldType.STRING) + .build(); + + public static final Schema CUSTOMER_DEMOGRAPHIC_SCHEMA = + Schema.builder() + .addNullableField("cd_demo_sk", Schema.FieldType.INT64) + .addNullableField("cd_gender", Schema.FieldType.STRING) + .addNullableField("cd_marital_status", Schema.FieldType.STRING) + .addNullableField("cd_education_status", Schema.FieldType.STRING) + .addNullableField("cd_purchase_estimate", Schema.FieldType.INT64) + .addNullableField("cd_credit_rating", Schema.FieldType.STRING) + .addNullableField("cd_dep_count", Schema.FieldType.INT64) + .addNullableField("cd_dep_employed_count", Schema.FieldType.INT64) + .addNullableField("cd_dep_college_count", Schema.FieldType.INT64) + .build(); + + public static final Schema WEB_SALES_SCHEMA = + Schema.builder() + .addNullableField("ws_sold_date_sk", Schema.FieldType.INT64) + .addNullableField("ws_sold_time_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_date_sk", Schema.FieldType.INT64) + .addNullableField("ws_item_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_customer_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_bill_addr_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_customer_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_cdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_hdemo_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_addr_sk", Schema.FieldType.INT64) + .addNullableField("ws_web_page_sk", Schema.FieldType.INT64) + .addNullableField("ws_web_site_sk", Schema.FieldType.INT64) + .addNullableField("ws_ship_mode_sk", Schema.FieldType.INT64) + .addNullableField("ws_warehouse_sk", Schema.FieldType.INT64) + .addNullableField("ws_promo_sk", Schema.FieldType.INT64) + .addNullableField("ws_order_number", Schema.FieldType.INT64) + .addNullableField("ws_quantity", Schema.FieldType.INT64) + .addNullableField("ws_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_list_price", Schema.FieldType.FLOAT) + .addNullableField("ws_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_discount_amt", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_sales_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_wholesale_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_list_price", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_coupon_amt", Schema.FieldType.FLOAT) + .addNullableField("ws_ext_ship_cost", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_ship", Schema.FieldType.FLOAT) + .addNullableField("ws_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) + .addNullableField("ws_net_profit", Schema.FieldType.FLOAT) + .build(); +} diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java new file mode 100644 index 0000000000..bd6a7041a1 --- /dev/null +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nemo.examples.beam.tpch; + +import com.google.common.collect.ImmutableMap; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.coders.RowCoder; +import org.apache.beam.sdk.extensions.sql.SqlTransform; +import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.values.*; +import org.apache.commons.csv.CSVFormat; +import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; +import org.apache.nemo.client.beam.NemoRunner; +import org.apache.nemo.examples.beam.GenericSourceSink; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Stream; + +import static org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.beamRow2CsvLine; + +/** + * A simple SQL application. + * (Copied and adapted from https://github.com/apache/beam/pull/6240) + */ +public final class Tpch { + private static final Logger LOG = LoggerFactory.getLogger(Tpch.class.getName()); + + /** + * Private Constructor. + */ + private Tpch() { + } + + /** + * Row csv formats. + */ + static class RowToCsv extends PTransform, PCollection> implements Serializable { + + private final CSVFormat csvFormat; + + RowToCsv(final CSVFormat csvFormat) { + this.csvFormat = csvFormat; + } + + public CSVFormat getCsvFormat() { + return csvFormat; + } + + @Override + public PCollection expand(final PCollection input) { + return input.apply( + "rowToCsv", + MapElements.into(TypeDescriptors.strings()).via(row -> beamRow2CsvLine(row, csvFormat))); + } + } + + private static PCollectionTuple getHTables(final Pipeline pipeline, + final CSVFormat csvFormat, + final String inputDirectory, + final String query) { + final ImmutableMap hSchemas = ImmutableMap.builder() + .put("lineitem", Schemas.LINEITEM_SCHEMA) + .put("customer", Schemas.CUSTOMER_SCHEMA) + .put("orders", Schemas.ORDER_SCHEMA) + + .put("supplier", Schemas.SUPPLIER_SCHEMA) + .put("nation", Schemas.NATION_SCHEMA) + .put("region", Schemas.REGION_SCHEMA) + + .put("part", Schemas.PART_SCHEMA) + .put("partsupp", Schemas.PARTSUPP_SCHEMA) + /* + .put("store_sales", Schemas.STORE_SALES_SCHEMA) + .put("catalog_sales", Schemas.CATALOG_SALES_SCHEMA) + .put("item", Schemas.ITEM_SCHEMA) + .put("date_dim", Schemas.DATE_DIM_SCHEMA) + .put("promotion", Schemas.PROMOTION_SCHEMA) + .put("customer_demographics", Schemas.CUSTOMER_DEMOGRAPHIC_SCHEMA) + .put("web_sales", Schemas.WEB_SALES_SCHEMA) + .put("inventory", Schemas.INVENTORY_SCHEMA) + */ + .build(); + + PCollectionTuple tables = PCollectionTuple.empty(pipeline); + for (final Map.Entry tableSchema : hSchemas.entrySet()) { + final String tableName = tableSchema.getKey(); + + if (query.contains(tableName)) { + LOG.info("HIT: tablename {}", tableName); + + final String filePattern = inputDirectory + tableSchema.getKey() + ".tbl*"; + final PCollection table = GenericSourceSink.read(pipeline, filePattern) + .apply("StringToRow", new TextTableProvider.CsvToRow(tableSchema.getValue(), csvFormat)) + .setCoder(RowCoder.of(tableSchema.getValue())) + .setRowSchema(tableSchema.getValue()) + .setName(tableSchema.getKey()); + tables = tables.and(new TupleTag<>(tableSchema.getKey()), table); + + LOG.info("FilePattern {} / Tables {}", filePattern, tables); + } + } + return tables; + } + + + /** + * @param args arguments. + */ + public static void main(final String[] args) { + final String queryFilePath = args[0]; + final String inputDirectory = args[1]; + final String outputFilePath = args[2]; + + LOG.info("{} / {} / {}", queryFilePath, inputDirectory, outputFilePath); + + final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class); + options.setRunner(NemoRunner.class); + options.setJobName("TPC-H"); + final Pipeline p = Pipeline.create(options); + + final String queryString = getQueryString(queryFilePath); + // Create tables + final CSVFormat csvFormat = CSVFormat.MYSQL + .withDelimiter('|') + .withNullString("") + .withTrailingDelimiter(); + final PCollectionTuple tables = getHTables(p, csvFormat, inputDirectory, queryString); + + // Run the TPC-H query + final PCollection result = tables.apply(SqlTransform.query(queryString)); + + final PCollection resultToWrite = result.apply(MapElements.into(TypeDescriptors.strings()).via( + new SerializableFunction() { + @Override + public String apply(final Row input) { + System.out.println(input.getValues().toString()); + return input.getValues().toString(); + } + })); + + GenericSourceSink.write(resultToWrite, outputFilePath); + + // Then run + p.run(); + } + + private static String getQueryString(final String queryFilePath) { + final List lines = new ArrayList<>(); + try (final Stream stream = Files.lines(Paths.get(queryFilePath))) { + stream.forEach(lines::add); + } catch (IOException e) { + throw new RuntimeException(e); + } + + System.out.println(lines); + + final StringBuilder sb = new StringBuilder(); + lines.forEach(line -> { + sb.append(" "); + sb.append(line); + }); + + final String concate = sb.toString(); + System.out.println(concate); + final String cleanOne = concate.replaceAll("\n", " "); + System.out.println(cleanOne); + final String cleanTwo = cleanOne.replaceAll("\t", " "); + System.out.println(cleanTwo); + + return cleanTwo; + } +} From bd5458441f0c428c4342dfa93ae072d9e00eb7f0 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:42:09 +0900 Subject: [PATCH 214/235] tpch --- .../apache/nemo/examples/beam/tpch/Tpch.java | 2 +- .../runtime/executor/MemoryPoolAssigner.java | 204 ++++++++++++++++++ 2 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java index bd6a7041a1..816549d794 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -30,7 +30,7 @@ import org.apache.beam.sdk.values.*; import org.apache.commons.csv.CSVFormat; import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; -import org.apache.nemo.client.beam.NemoRunner; +import org.apache.nemo.compiler.frontend.beam.NemoRunner; import org.apache.nemo.examples.beam.GenericSourceSink; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java new file mode 100644 index 0000000000..d474828309 --- /dev/null +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.nemo.runtime.executor; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; + +/** + * The MemoryPoolAssigner assigns the memory that Nemo uses for writing data blocks from the {@link MemoryPool}. + * Memory is represented in chunks of equal size. Consumers of off-heap memory acquire the memory by requesting + * a number of {@link MemoryChunk} they need. + * + * MemoryPoolAssigner currently supports allocation of off-heap memory only. + * + * The MemoryPoolAssigner pre-allocates all memory at the start. Memory will be occupied and reserved from start on, + * which means that no OutOfMemoryError comes while requesting memory. Released memory will return to the MemoryPool. + */ +public class MemoryPoolAssigner { + + private static final Logger LOG = LoggerFactory.getLogger(MemoryPoolAssigner.class.getName()); + public static final int DEFAULT_PAGE_SIZE = 32 * 1024; + public static final int MIN_PAGE_SIZE = 4 * 1024; + private final MemoryPool memoryPool; + private final int pageSize; + private final long memorySize; + + + public MemoryPoolAssigner(long memorySize) { + this(memorySize, DEFAULT_PAGE_SIZE); + } + + public MemoryPoolAssigner(long memorySize, int pageSize) { + this.memorySize = memorySize; + this.pageSize = pageSize; + final long numPages = memorySize / pageSize; + if (numPages > Integer.MAX_VALUE) { + throw new IllegalArgumentException("The given number of memory bytes (" + memorySize + + ") corresponds to more than MAX_INT pages."); + } + + final int totalNumPages = (int) numPages; + if (totalNumPages < 1) { + throw new IllegalArgumentException("The given amount of memory amounted to less than one page."); + } + + this.memoryPool = new MemoryPool(totalNumPages, pageSize); + } + + public List allocatePages(int numPages) throws MemoryAllocationException { + final ArrayList segs = new ArrayList(numPages); + allocatePages(segs, numPages); + return segs; + } + + public void allocatePages(List target, int numPages) + throws MemoryAllocationException { + + if (numPages > (memoryPool.getNumOfAvailableMemoryChunks())) { + throw new MemoryAllocationException("Could not allocate " + numPages + " pages. Only " + + (memoryPool.getNumOfAvailableMemoryChunks()) + + " pages are remaining."); + } + + for (int i = numPages; i > 0; i--) { + MemoryChunk chunk = memoryPool.requestChunkFromPool(); + target.add(chunk); + } + } + + +// abstract static class MemoryPool { +// abstract int getNumOfAvailableMemoryChunks(); +// +// abstract MemoryChunk allocateNewChunk(); +// +// abstract MemoryChunk requestChunkFromPool(); +// +// abstract void returnChunkToPool(MemoryChunk segment); +// +// abstract void clear(); +// } + + /** + static final class HeapMemoryPool extends MemoryPool { + + private final ConcurrentLinkedQueue available; + private final int chunkSize; + + + HeapMemoryPool(final int numInitialChunks, final int chunkSize) { + this.chunkSize = chunkSize; + this.available = new ConcurrentLinkedQueue<>(); + + for (int i = 0; i < numInitialChunks; i++) { + this.available.add(ByteBuffer.allocate(chunkSize)); + } + } + + @Override + int getNumOfAvailableMemoryChunks() { + return this.available.size(); + } + + @Override + MemoryChunk allocateNewChunk() { + ByteBuffer memory = ByteBuffer.allocate(chunkSize); + return new MemoryChunk(memory); + } + + @Override + MemoryChunk requestChunkFromPool() { + + } + + abstract void returnChunkToPool(MemoryChunk segment); + + abstract void clear(); + + }*/ + + /** + * + * Supports both on-heap and off-heap memory pool. + * off-heap is pre-allocated and managed. on-heap memory is used when off-heap memory runs out. + * + */ + static final class MemoryPool { + + private final ConcurrentLinkedQueue available; + private final int chunkSize; + + MemoryPool(final int numInitialChunks, final int chunkSize) { + this.chunkSize = chunkSize; + this.available = new ConcurrentLinkedQueue<>(); + + /** Pre-allocation of off-heap memory*/ + for (int i = 0; i < numInitialChunks; i++) { + this.available.add(ByteBuffer.allocateDirect(chunkSize)); + } + } + + MemoryChunk allocateNewOffHeapChunk() { + ByteBuffer memory = ByteBuffer.allocateDirect(chunkSize); + return new MemoryChunk(memory); + } + + /** + * Used when there is no available buffer in the pool. + * @return + */ + MemoryChunk allocateNewOnHeapChunk() { + ByteBuffer memory = ByteBuffer.allocate(chunkSize); + return new MemoryChunk(memory); + } + + MemoryChunk requestChunkFromPool() { + ByteBuffer buf = available.remove(); + return new MemoryChunk(buf); + } + + /** + * Only off-heap chunk is returned to the pool. + * On-heap chunk is not managed as a pool actually. + * @param chunk + */ + void returnChunkToPool(MemoryChunk chunk) { + MemoryChunk offHeapChunk = chunk; + ByteBuffer buf = offHeapChunk.getBuffer(); + available.add(buf); + chunk.free(); + } + + protected int getNumOfAvailableMemoryChunks() { + return available.size(); + } + + void clear() { + available.clear(); + } + } +} From 795da8338265e8b3906b30e73f6cec1674e8d97e Mon Sep 17 00:00:00 2001 From: hy00nc Date: Mon, 10 Jun 2019 07:43:33 +0000 Subject: [PATCH 215/235] Revert "tpch" This reverts commit bd5458441f0c428c4342dfa93ae072d9e00eb7f0. --- .../apache/nemo/examples/beam/tpch/Tpch.java | 2 +- .../runtime/executor/MemoryPoolAssigner.java | 204 ------------------ 2 files changed, 1 insertion(+), 205 deletions(-) delete mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java index 816549d794..bd6a7041a1 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -30,7 +30,7 @@ import org.apache.beam.sdk.values.*; import org.apache.commons.csv.CSVFormat; import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; -import org.apache.nemo.compiler.frontend.beam.NemoRunner; +import org.apache.nemo.client.beam.NemoRunner; import org.apache.nemo.examples.beam.GenericSourceSink; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java deleted file mode 100644 index d474828309..0000000000 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/MemoryPoolAssigner.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.nemo.runtime.executor; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ConcurrentLinkedQueue; - -/** - * The MemoryPoolAssigner assigns the memory that Nemo uses for writing data blocks from the {@link MemoryPool}. - * Memory is represented in chunks of equal size. Consumers of off-heap memory acquire the memory by requesting - * a number of {@link MemoryChunk} they need. - * - * MemoryPoolAssigner currently supports allocation of off-heap memory only. - * - * The MemoryPoolAssigner pre-allocates all memory at the start. Memory will be occupied and reserved from start on, - * which means that no OutOfMemoryError comes while requesting memory. Released memory will return to the MemoryPool. - */ -public class MemoryPoolAssigner { - - private static final Logger LOG = LoggerFactory.getLogger(MemoryPoolAssigner.class.getName()); - public static final int DEFAULT_PAGE_SIZE = 32 * 1024; - public static final int MIN_PAGE_SIZE = 4 * 1024; - private final MemoryPool memoryPool; - private final int pageSize; - private final long memorySize; - - - public MemoryPoolAssigner(long memorySize) { - this(memorySize, DEFAULT_PAGE_SIZE); - } - - public MemoryPoolAssigner(long memorySize, int pageSize) { - this.memorySize = memorySize; - this.pageSize = pageSize; - final long numPages = memorySize / pageSize; - if (numPages > Integer.MAX_VALUE) { - throw new IllegalArgumentException("The given number of memory bytes (" + memorySize - + ") corresponds to more than MAX_INT pages."); - } - - final int totalNumPages = (int) numPages; - if (totalNumPages < 1) { - throw new IllegalArgumentException("The given amount of memory amounted to less than one page."); - } - - this.memoryPool = new MemoryPool(totalNumPages, pageSize); - } - - public List allocatePages(int numPages) throws MemoryAllocationException { - final ArrayList segs = new ArrayList(numPages); - allocatePages(segs, numPages); - return segs; - } - - public void allocatePages(List target, int numPages) - throws MemoryAllocationException { - - if (numPages > (memoryPool.getNumOfAvailableMemoryChunks())) { - throw new MemoryAllocationException("Could not allocate " + numPages + " pages. Only " + - (memoryPool.getNumOfAvailableMemoryChunks()) - + " pages are remaining."); - } - - for (int i = numPages; i > 0; i--) { - MemoryChunk chunk = memoryPool.requestChunkFromPool(); - target.add(chunk); - } - } - - -// abstract static class MemoryPool { -// abstract int getNumOfAvailableMemoryChunks(); -// -// abstract MemoryChunk allocateNewChunk(); -// -// abstract MemoryChunk requestChunkFromPool(); -// -// abstract void returnChunkToPool(MemoryChunk segment); -// -// abstract void clear(); -// } - - /** - static final class HeapMemoryPool extends MemoryPool { - - private final ConcurrentLinkedQueue available; - private final int chunkSize; - - - HeapMemoryPool(final int numInitialChunks, final int chunkSize) { - this.chunkSize = chunkSize; - this.available = new ConcurrentLinkedQueue<>(); - - for (int i = 0; i < numInitialChunks; i++) { - this.available.add(ByteBuffer.allocate(chunkSize)); - } - } - - @Override - int getNumOfAvailableMemoryChunks() { - return this.available.size(); - } - - @Override - MemoryChunk allocateNewChunk() { - ByteBuffer memory = ByteBuffer.allocate(chunkSize); - return new MemoryChunk(memory); - } - - @Override - MemoryChunk requestChunkFromPool() { - - } - - abstract void returnChunkToPool(MemoryChunk segment); - - abstract void clear(); - - }*/ - - /** - * - * Supports both on-heap and off-heap memory pool. - * off-heap is pre-allocated and managed. on-heap memory is used when off-heap memory runs out. - * - */ - static final class MemoryPool { - - private final ConcurrentLinkedQueue available; - private final int chunkSize; - - MemoryPool(final int numInitialChunks, final int chunkSize) { - this.chunkSize = chunkSize; - this.available = new ConcurrentLinkedQueue<>(); - - /** Pre-allocation of off-heap memory*/ - for (int i = 0; i < numInitialChunks; i++) { - this.available.add(ByteBuffer.allocateDirect(chunkSize)); - } - } - - MemoryChunk allocateNewOffHeapChunk() { - ByteBuffer memory = ByteBuffer.allocateDirect(chunkSize); - return new MemoryChunk(memory); - } - - /** - * Used when there is no available buffer in the pool. - * @return - */ - MemoryChunk allocateNewOnHeapChunk() { - ByteBuffer memory = ByteBuffer.allocate(chunkSize); - return new MemoryChunk(memory); - } - - MemoryChunk requestChunkFromPool() { - ByteBuffer buf = available.remove(); - return new MemoryChunk(buf); - } - - /** - * Only off-heap chunk is returned to the pool. - * On-heap chunk is not managed as a pool actually. - * @param chunk - */ - void returnChunkToPool(MemoryChunk chunk) { - MemoryChunk offHeapChunk = chunk; - ByteBuffer buf = offHeapChunk.getBuffer(); - available.add(buf); - chunk.free(); - } - - protected int getNumOfAvailableMemoryChunks() { - return available.size(); - } - - void clear() { - available.clear(); - } - } -} From ae04605d1a3560bfec893df49574b301338edf20 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:46:52 +0900 Subject: [PATCH 216/235] tpch --- .../src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java index bd6a7041a1..816549d794 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -30,7 +30,7 @@ import org.apache.beam.sdk.values.*; import org.apache.commons.csv.CSVFormat; import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; -import org.apache.nemo.client.beam.NemoRunner; +import org.apache.nemo.compiler.frontend.beam.NemoRunner; import org.apache.nemo.examples.beam.GenericSourceSink; import org.slf4j.Logger; import org.slf4j.LoggerFactory; From d54ba3cfa16c34032ad70276e665056e78f2973d Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 16:51:31 +0900 Subject: [PATCH 217/235] this is the right tpch :) --- .../main/java/org/apache/nemo/examples/beam/tpch/Tpch.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java index 816549d794..847d896673 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java @@ -18,7 +18,6 @@ import com.google.common.collect.ImmutableMap; import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.RowCoder; import org.apache.beam.sdk.extensions.sql.SqlTransform; import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider; import org.apache.beam.sdk.options.PipelineOptions; @@ -117,8 +116,7 @@ private static PCollectionTuple getHTables(final Pipeline pipeline, final String filePattern = inputDirectory + tableSchema.getKey() + ".tbl*"; final PCollection table = GenericSourceSink.read(pipeline, filePattern) .apply("StringToRow", new TextTableProvider.CsvToRow(tableSchema.getValue(), csvFormat)) - .setCoder(RowCoder.of(tableSchema.getValue())) - .setRowSchema(tableSchema.getValue()) + .setCoder(tableSchema.getValue().getRowCoder()) .setName(tableSchema.getKey()); tables = tables.and(new TupleTag<>(tableSchema.getKey()), table); From f838cdc93f0c2fd94d7e36ac76e6fcbd3a7d5c3b Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 17:40:25 +0900 Subject: [PATCH 218/235] gluster and crail conflict issue... --- .../nemo/runtime/executor/data/BlockManagerWorker.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 3900b0315a..2c4d22bb60 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -478,7 +478,7 @@ private BlockStore getBlockStore(final DataStoreProperty.Value blockStore) { case LocalFileStore: return localFileStore; case GlusterFileStore: - return remoteFileStore; + return localFileStore; case CrailFileStore: return remoteFileStore; default: @@ -502,7 +502,7 @@ private static ControlMessage.BlockStore convertBlockStore( case LocalFileStore: return ControlMessage.BlockStore.LOCAL_FILE; case GlusterFileStore: - return ControlMessage.BlockStore.REMOTE_FILE; + return ControlMessage.BlockStore.LOCAL_FILE; //since it is treated the same way as LOCAL_FILE case CrailFileStore: return ControlMessage.BlockStore.REMOTE_FILE; default: @@ -518,7 +518,7 @@ private static ControlMessage.BlockStore convertBlockStore( */ private static DataStoreProperty.Value convertBlockStore( final ControlMessage.BlockStore blockStoreType) { - LOG.info("HY: {}", blockStoreType.getClass().getName()); + LOG.info("HY: {}", blockStoreType.getClass()); switch (blockStoreType) { case MEMORY: return DataStoreProperty.Value.MemoryStore; @@ -527,8 +527,7 @@ private static DataStoreProperty.Value convertBlockStore( case LOCAL_FILE: return DataStoreProperty.Value.LocalFileStore; case REMOTE_FILE: - if (blockStoreType.getClass().getName() == "CrailFileStore") return DataStoreProperty.Value.CrailFileStore; - else return DataStoreProperty.Value.GlusterFileStore; + return DataStoreProperty.Value.CrailFileStore; default: throw new UnsupportedBlockStoreException(new Exception("This block store is not yet supported")); } From ac40d0d12904516fc9a5b9b389e5f713cd2f5ebb Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 17:56:35 +0900 Subject: [PATCH 219/235] back to original --- .../org/apache/nemo/runtime/executor/data/BlockStoreTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/test/java/org/apache/nemo/runtime/executor/data/BlockStoreTest.java b/runtime/executor/src/test/java/org/apache/nemo/runtime/executor/data/BlockStoreTest.java index 6d306ffa9e..f865f03f42 100644 --- a/runtime/executor/src/test/java/org/apache/nemo/runtime/executor/data/BlockStoreTest.java +++ b/runtime/executor/src/test/java/org/apache/nemo/runtime/executor/data/BlockStoreTest.java @@ -275,7 +275,7 @@ public void testGlusterFileStore() throws Exception { FileUtils.deleteDirectory(new File(TMP_FILE_DIRECTORY)); } - private RemoteFileStore createGlusterFileStore(final String executorId) + private GlusterFileStore createGlusterFileStore(final String executorId) throws InjectionException { final Injector injector = LocalMessageEnvironment.forkInjector(baseInjector, executorId); injector.bindVolatileParameter(JobConf.GlusterVolumeDirectory.class, TMP_FILE_DIRECTORY); From 231130caff0018f3343d2a746eef0631d407aa2a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 10 Jun 2019 17:58:57 +0900 Subject: [PATCH 220/235] revert tpch --- .../nemo/examples/beam/GenericSourceSink.java | 2 +- .../nemo/examples/beam/tpch/Schemas.java | 363 ------------------ .../apache/nemo/examples/beam/tpch/Tpch.java | 196 ---------- 3 files changed, 1 insertion(+), 560 deletions(-) delete mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java delete mode 100644 examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java index 3ebcb4cef2..2ab09a7f32 100644 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java +++ b/examples/beam/src/main/java/org/apache/nemo/examples/beam/GenericSourceSink.java @@ -42,7 +42,7 @@ * Helper class for handling source/sink in a generic way. * Assumes String-type PCollections. */ -public final class GenericSourceSink { +final class GenericSourceSink { /** * Default Constructor. */ diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java deleted file mode 100644 index f1bdd71971..0000000000 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Schemas.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nemo.examples.beam.tpch; - -import com.google.common.collect.ImmutableMap; -import org.apache.beam.sdk.schemas.Schema; - -/** - * A simple SQL application. - * (Copied and adapted from https://github.com/apache/beam/pull/6240) - */ -public final class Schemas { - /** - * Private. - */ - private Schemas() { - } - - public static final ImmutableMap COLUMN_PREFIX = ImmutableMap.builder() - .put("lineitem", "l_") - .put("customer", "c_") - .put("supplier", "s_") - .put("partsupp", "ps_") - .put("part", "p_") - .put("orders", "o_") - .put("nation", "n_") - .put("region", "r_") - .build(); - - public static final Schema STORE_SALES_SCHEMA = - Schema.builder() - .addNullableField("ss_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("ss_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("ss_item_sk", Schema.FieldType.INT64) - .addNullableField("ss_customer_sk", Schema.FieldType.STRING) - .addNullableField("ss_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ss_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ss_addr_sk", Schema.FieldType.INT64) - .addNullableField("ss_store_sk", Schema.FieldType.INT64) - .addNullableField("ss_promo_sk", Schema.FieldType.INT64) - .addNullableField("ss_ticket_number", Schema.FieldType.INT64) - .addNullableField("ss_quantity", Schema.FieldType.INT64) - .addNullableField("ss_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ss_list_price", Schema.FieldType.FLOAT) - .addNullableField("ss_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("ss_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("ss_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("ss_net_paid", Schema.FieldType.FLOAT) - .addNullableField("ss_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("ss_net_profit", Schema.FieldType.FLOAT) - .build(); - - public static final Schema DATE_DIM_SCHEMA = - Schema.builder() - .addNullableField("d_date_sk", Schema.FieldType.INT64) - .addNullableField("d_date_id", Schema.FieldType.STRING) - .addNullableField("d_date", Schema.FieldType.STRING) - .addNullableField("d_month_seq", Schema.FieldType.INT64) - .addNullableField("d_week_seq", Schema.FieldType.INT64) - .addNullableField("d_quarter_seq", Schema.FieldType.INT64) - .addNullableField("d_year", Schema.FieldType.INT64) - .addNullableField("d_dow", Schema.FieldType.INT64) - .addNullableField("d_moy", Schema.FieldType.INT64) - .addNullableField("d_dom", Schema.FieldType.INT64) - .addNullableField("d_qoy", Schema.FieldType.INT64) - .addNullableField("d_fy_year", Schema.FieldType.INT64) - .addNullableField("d_fy_quarter_seq", Schema.FieldType.INT64) - .addNullableField("d_fy_week_seq", Schema.FieldType.INT64) - .addNullableField("d_day_name", Schema.FieldType.STRING) - .addNullableField("d_quarter_name", Schema.FieldType.STRING) - .addNullableField("d_holiday", Schema.FieldType.STRING) - .addNullableField("d_weekend", Schema.FieldType.STRING) - .addNullableField("d_following_holiday", Schema.FieldType.STRING) - .addNullableField("d_first_dom", Schema.FieldType.INT64) - .addNullableField("d_last_dom", Schema.FieldType.INT64) - .addNullableField("d_same_day_ly", Schema.FieldType.INT64) - .addNullableField("d_same_day_lq", Schema.FieldType.INT64) - .addNullableField("d_current_day", Schema.FieldType.STRING) - .addNullableField("d_current_week", Schema.FieldType.STRING) - .addNullableField("d_current_month", Schema.FieldType.STRING) - .addNullableField("d_current_quarter", Schema.FieldType.STRING) - .addNullableField("d_current_year", Schema.FieldType.STRING) - .build(); - - public static final Schema ITEM_SCHEMA = - Schema.builder() - .addNullableField("i_item_sk", Schema.FieldType.INT64) - .addNullableField("i_item_id", Schema.FieldType.STRING) - .addNullableField("i_rec_start_date", Schema.FieldType.DATETIME) - .addNullableField("i_rec_end_date", Schema.FieldType.DATETIME) - .addNullableField("i_item_desc", Schema.FieldType.STRING) - .addNullableField("i_current_price", Schema.FieldType.FLOAT) - .addNullableField("i_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("i_brand_id", Schema.FieldType.INT64) - .addNullableField("i_brand", Schema.FieldType.STRING) - .addNullableField("i_class_id", Schema.FieldType.INT64) - .addNullableField("i_class", Schema.FieldType.STRING) - .addNullableField("i_category_id", Schema.FieldType.INT64) - .addNullableField("i_category", Schema.FieldType.STRING) - .addNullableField("i_manufact_id", Schema.FieldType.INT64) - .addNullableField("i_manufact", Schema.FieldType.STRING) - .addNullableField("i_size", Schema.FieldType.STRING) - .addNullableField("i_formulation", Schema.FieldType.STRING) - .addNullableField("i_color", Schema.FieldType.STRING) - .addNullableField("i_units", Schema.FieldType.STRING) - .addNullableField("i_container", Schema.FieldType.STRING) - .addNullableField("i_manager_id", Schema.FieldType.INT64) - .addNullableField("i_product_name", Schema.FieldType.STRING) - .build(); - - public static final Schema INVENTORY_SCHEMA = - Schema.builder() - .addNullableField("inv_date_sk", Schema.FieldType.INT64) - .addNullableField("inv_item_sk", Schema.FieldType.INT64) - .addNullableField("inv_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("inv_quantity_on_hand", Schema.FieldType.INT64) - .build(); - - public static final Schema CATALOG_SALES_SCHEMA = - Schema.builder() - .addNullableField("cs_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("cs_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_date_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_customer_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_bill_addr_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_customer_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_addr_sk", Schema.FieldType.INT64) - .addNullableField("cs_call_center_sk", Schema.FieldType.INT64) - .addNullableField("cs_catalog_page_sk", Schema.FieldType.INT64) - .addNullableField("cs_ship_mode_sk", Schema.FieldType.INT64) - .addNullableField("cs_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("cs_item_sk", Schema.FieldType.INT64) - .addNullableField("cs_promo_sk", Schema.FieldType.INT64) - .addNullableField("cs_order_number", Schema.FieldType.INT64) - .addNullableField("cs_quantity", Schema.FieldType.INT64) - .addNullableField("cs_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_list_price", Schema.FieldType.FLOAT) - .addNullableField("cs_sales_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("cs_ext_ship_cost", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_ship", Schema.FieldType.FLOAT) - .addNullableField("cs_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) - .addNullableField("cs_net_profit", Schema.FieldType.FLOAT) - .build(); - - public static final Schema ORDER_SCHEMA = - Schema.builder() - .addInt64Field("o_orderkey") - .addInt64Field("o_custkey") - .addStringField("o_orderstatus") - .addFloatField("o_totalprice") - .addStringField("o_orderdate") - .addStringField("o_orderpriority") - .addStringField("o_clerk") - .addInt64Field("o_shippriority") - .addStringField("o_comment") - .build(); - - public static final Schema CUSTOMER_SCHEMA = - Schema.builder() - .addInt64Field("c_custkey") - .addStringField("c_name") - .addStringField("c_address") - .addInt64Field("c_nationkey") - .addStringField("c_phone") - .addFloatField("c_acctbal") - .addStringField("c_mktsegment") - .addStringField("c_comment") - .build(); - - public static final Schema CUSTOMER_DS_SCHEMA = - Schema.builder() - .addNullableField("c_customer_sk", Schema.FieldType.INT64) - .addNullableField("c_customer_id", Schema.FieldType.STRING) - .addNullableField("c_current_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("c_current_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("c_current_addr_sk", Schema.FieldType.INT64) - .addNullableField("c_first_shipto_date_sk", Schema.FieldType.INT64) - .addNullableField("c_first_sales_date_sk", Schema.FieldType.INT64) - .addNullableField("c_salutation", Schema.FieldType.STRING) - .addNullableField("c_first_name", Schema.FieldType.STRING) - .addNullableField("c_last_name", Schema.FieldType.STRING) - .addNullableField("c_preferred_cust_flag", Schema.FieldType.STRING) - .addNullableField("c_birth_day", Schema.FieldType.INT64) - .addNullableField("c_birth_month", Schema.FieldType.INT64) - .addNullableField("c_birth_year", Schema.FieldType.INT64) - .addNullableField("c_birth_country", Schema.FieldType.STRING) - .addNullableField("c_login", Schema.FieldType.STRING) - .addNullableField("c_email_address", Schema.FieldType.STRING) - .addNullableField("c_last_review_date", Schema.FieldType.STRING) - .build(); - - public static final Schema LINEITEM_SCHEMA = - Schema.builder() - .addInt64Field("l_orderkey") - .addInt64Field("l_partkey") - .addInt64Field("l_suppkey") - .addInt64Field("l_linenumber") - .addFloatField("l_quantity") - .addFloatField("l_extendedprice") - .addFloatField("l_discount") - .addFloatField("l_tax") - .addStringField("l_returnflag") - .addStringField("l_linestatus") - .addStringField("l_shipdate") - .addStringField("l_commitdate") - .addStringField("l_receiptdate") - .addStringField("l_shipinstruct") - .addStringField("l_shipmode") - .addStringField("l_comment") - .build(); - - public static final Schema PARTSUPP_SCHEMA = - Schema.builder() - .addInt64Field("ps_partkey") - .addInt64Field("ps_suppkey") - .addInt64Field("ps_availqty") - .addFloatField("ps_supplycost") - .addStringField("ps_comment") - .build(); - - public static final Schema REGION_SCHEMA = - Schema.builder() - .addInt64Field("r_regionkey") - .addStringField("r_name") - .addStringField("r_comment") - .build(); - - public static final Schema SUPPLIER_SCHEMA = - Schema.builder() - .addInt64Field("s_suppkey") - .addStringField("s_name") - .addStringField("s_address") - .addInt64Field("s_nationkey") - .addStringField("s_phone") - .addFloatField("s_acctbal") - .addStringField("s_comment") - .build(); - - public static final Schema PART_SCHEMA = - Schema.builder() - .addInt64Field("p_partkey") - .addStringField("p_name") - .addStringField("p_mfgr") - .addStringField("p_brand") - .addStringField("p_type") - .addInt64Field("p_size") - .addStringField("p_container") - .addFloatField("p_retailprice") - .addStringField("p_comment") - .build(); - - public static final Schema NATION_SCHEMA = - Schema.builder() - .addInt64Field("n_nationkey") - .addStringField("n_name") - .addInt64Field("n_regionkey") - .addStringField("n_comment") - .build(); - - public static final Schema PROMOTION_SCHEMA = - Schema.builder() - .addNullableField("p_promo_sk", Schema.FieldType.INT64) - .addNullableField("p_promo_id", Schema.FieldType.STRING) - .addNullableField("p_start_date_sk", Schema.FieldType.INT64) - .addNullableField("p_end_date_sk", Schema.FieldType.INT64) - .addNullableField("p_item_sk", Schema.FieldType.INT64) - .addNullableField("p_cost", Schema.FieldType.FLOAT) - .addNullableField("p_response_target", Schema.FieldType.INT64) - .addNullableField("p_promo_name", Schema.FieldType.STRING) - .addNullableField("p_channel_dmail", Schema.FieldType.STRING) - .addNullableField("p_channel_email", Schema.FieldType.STRING) - .addNullableField("p_channel_catalog", Schema.FieldType.STRING) - .addNullableField("p_channel_tv", Schema.FieldType.STRING) - .addNullableField("p_channel_radio", Schema.FieldType.STRING) - .addNullableField("p_channel_press", Schema.FieldType.STRING) - .addNullableField("p_channel_event", Schema.FieldType.STRING) - .addNullableField("p_channel_demo", Schema.FieldType.STRING) - .addNullableField("p_channel_details", Schema.FieldType.STRING) - .addNullableField("p_purpose", Schema.FieldType.STRING) - .addNullableField("p_discount_active", Schema.FieldType.STRING) - .build(); - - public static final Schema CUSTOMER_DEMOGRAPHIC_SCHEMA = - Schema.builder() - .addNullableField("cd_demo_sk", Schema.FieldType.INT64) - .addNullableField("cd_gender", Schema.FieldType.STRING) - .addNullableField("cd_marital_status", Schema.FieldType.STRING) - .addNullableField("cd_education_status", Schema.FieldType.STRING) - .addNullableField("cd_purchase_estimate", Schema.FieldType.INT64) - .addNullableField("cd_credit_rating", Schema.FieldType.STRING) - .addNullableField("cd_dep_count", Schema.FieldType.INT64) - .addNullableField("cd_dep_employed_count", Schema.FieldType.INT64) - .addNullableField("cd_dep_college_count", Schema.FieldType.INT64) - .build(); - - public static final Schema WEB_SALES_SCHEMA = - Schema.builder() - .addNullableField("ws_sold_date_sk", Schema.FieldType.INT64) - .addNullableField("ws_sold_time_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_date_sk", Schema.FieldType.INT64) - .addNullableField("ws_item_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_customer_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_bill_addr_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_customer_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_cdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_hdemo_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_addr_sk", Schema.FieldType.INT64) - .addNullableField("ws_web_page_sk", Schema.FieldType.INT64) - .addNullableField("ws_web_site_sk", Schema.FieldType.INT64) - .addNullableField("ws_ship_mode_sk", Schema.FieldType.INT64) - .addNullableField("ws_warehouse_sk", Schema.FieldType.INT64) - .addNullableField("ws_promo_sk", Schema.FieldType.INT64) - .addNullableField("ws_order_number", Schema.FieldType.INT64) - .addNullableField("ws_quantity", Schema.FieldType.INT64) - .addNullableField("ws_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_list_price", Schema.FieldType.FLOAT) - .addNullableField("ws_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_discount_amt", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_sales_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_wholesale_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_list_price", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_coupon_amt", Schema.FieldType.FLOAT) - .addNullableField("ws_ext_ship_cost", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_ship", Schema.FieldType.FLOAT) - .addNullableField("ws_net_paid_inc_ship_tax", Schema.FieldType.FLOAT) - .addNullableField("ws_net_profit", Schema.FieldType.FLOAT) - .build(); -} diff --git a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java b/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java deleted file mode 100644 index 847d896673..0000000000 --- a/examples/beam/src/main/java/org/apache/nemo/examples/beam/tpch/Tpch.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nemo.examples.beam.tpch; - -import com.google.common.collect.ImmutableMap; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.extensions.sql.SqlTransform; -import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTableProvider; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.schemas.Schema; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.*; -import org.apache.commons.csv.CSVFormat; -import org.apache.nemo.compiler.frontend.beam.NemoPipelineOptions; -import org.apache.nemo.compiler.frontend.beam.NemoRunner; -import org.apache.nemo.examples.beam.GenericSourceSink; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.*; -import java.util.stream.Stream; - -import static org.apache.beam.sdk.extensions.sql.impl.schema.BeamTableUtils.beamRow2CsvLine; - -/** - * A simple SQL application. - * (Copied and adapted from https://github.com/apache/beam/pull/6240) - */ -public final class Tpch { - private static final Logger LOG = LoggerFactory.getLogger(Tpch.class.getName()); - - /** - * Private Constructor. - */ - private Tpch() { - } - - /** - * Row csv formats. - */ - static class RowToCsv extends PTransform, PCollection> implements Serializable { - - private final CSVFormat csvFormat; - - RowToCsv(final CSVFormat csvFormat) { - this.csvFormat = csvFormat; - } - - public CSVFormat getCsvFormat() { - return csvFormat; - } - - @Override - public PCollection expand(final PCollection input) { - return input.apply( - "rowToCsv", - MapElements.into(TypeDescriptors.strings()).via(row -> beamRow2CsvLine(row, csvFormat))); - } - } - - private static PCollectionTuple getHTables(final Pipeline pipeline, - final CSVFormat csvFormat, - final String inputDirectory, - final String query) { - final ImmutableMap hSchemas = ImmutableMap.builder() - .put("lineitem", Schemas.LINEITEM_SCHEMA) - .put("customer", Schemas.CUSTOMER_SCHEMA) - .put("orders", Schemas.ORDER_SCHEMA) - - .put("supplier", Schemas.SUPPLIER_SCHEMA) - .put("nation", Schemas.NATION_SCHEMA) - .put("region", Schemas.REGION_SCHEMA) - - .put("part", Schemas.PART_SCHEMA) - .put("partsupp", Schemas.PARTSUPP_SCHEMA) - /* - .put("store_sales", Schemas.STORE_SALES_SCHEMA) - .put("catalog_sales", Schemas.CATALOG_SALES_SCHEMA) - .put("item", Schemas.ITEM_SCHEMA) - .put("date_dim", Schemas.DATE_DIM_SCHEMA) - .put("promotion", Schemas.PROMOTION_SCHEMA) - .put("customer_demographics", Schemas.CUSTOMER_DEMOGRAPHIC_SCHEMA) - .put("web_sales", Schemas.WEB_SALES_SCHEMA) - .put("inventory", Schemas.INVENTORY_SCHEMA) - */ - .build(); - - PCollectionTuple tables = PCollectionTuple.empty(pipeline); - for (final Map.Entry tableSchema : hSchemas.entrySet()) { - final String tableName = tableSchema.getKey(); - - if (query.contains(tableName)) { - LOG.info("HIT: tablename {}", tableName); - - final String filePattern = inputDirectory + tableSchema.getKey() + ".tbl*"; - final PCollection table = GenericSourceSink.read(pipeline, filePattern) - .apply("StringToRow", new TextTableProvider.CsvToRow(tableSchema.getValue(), csvFormat)) - .setCoder(tableSchema.getValue().getRowCoder()) - .setName(tableSchema.getKey()); - tables = tables.and(new TupleTag<>(tableSchema.getKey()), table); - - LOG.info("FilePattern {} / Tables {}", filePattern, tables); - } - } - return tables; - } - - - /** - * @param args arguments. - */ - public static void main(final String[] args) { - final String queryFilePath = args[0]; - final String inputDirectory = args[1]; - final String outputFilePath = args[2]; - - LOG.info("{} / {} / {}", queryFilePath, inputDirectory, outputFilePath); - - final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class); - options.setRunner(NemoRunner.class); - options.setJobName("TPC-H"); - final Pipeline p = Pipeline.create(options); - - final String queryString = getQueryString(queryFilePath); - // Create tables - final CSVFormat csvFormat = CSVFormat.MYSQL - .withDelimiter('|') - .withNullString("") - .withTrailingDelimiter(); - final PCollectionTuple tables = getHTables(p, csvFormat, inputDirectory, queryString); - - // Run the TPC-H query - final PCollection result = tables.apply(SqlTransform.query(queryString)); - - final PCollection resultToWrite = result.apply(MapElements.into(TypeDescriptors.strings()).via( - new SerializableFunction() { - @Override - public String apply(final Row input) { - System.out.println(input.getValues().toString()); - return input.getValues().toString(); - } - })); - - GenericSourceSink.write(resultToWrite, outputFilePath); - - // Then run - p.run(); - } - - private static String getQueryString(final String queryFilePath) { - final List lines = new ArrayList<>(); - try (final Stream stream = Files.lines(Paths.get(queryFilePath))) { - stream.forEach(lines::add); - } catch (IOException e) { - throw new RuntimeException(e); - } - - System.out.println(lines); - - final StringBuilder sb = new StringBuilder(); - lines.forEach(line -> { - sb.append(" "); - sb.append(line); - }); - - final String concate = sb.toString(); - System.out.println(concate); - final String cleanOne = concate.replaceAll("\n", " "); - System.out.println(cleanOne); - final String cleanTwo = cleanOne.replaceAll("\t", " "); - System.out.println(cleanTwo); - - return cleanTwo; - } -} From 0a0b62cce8f657fef333fdf19179531f4b5b547a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 11 Jun 2019 22:39:20 +0900 Subject: [PATCH 221/235] minor edits --- .../optimizer/policy/CrailPolicy.java | 2 +- .../executor/data/BlockManagerWorker.java | 3 +-- .../executor/data/block/CrailFileBlock.java | 23 +++++++++---------- .../executor/data/stores/CrailFileStore.java | 9 ++------ 4 files changed, 15 insertions(+), 22 deletions(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java index b49bcf37e0..2ac0accd21 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java @@ -28,7 +28,7 @@ import org.apache.reef.tang.Injector; /** - * A policy to demonstrate the disaggregation optimization, that uses GlusterFS as file storage. + * A policy to demonstrate the disaggregation optimization, that uses CrailFS as file storage. */ public final class CrailPolicy implements Policy { public static final PolicyBuilder BUILDER = diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 2c4d22bb60..69b8e529d2 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -349,7 +349,7 @@ public void run() { os.writeFileArea(fileArea); } } - } else if(DataStoreProperty.Value.CrailFileStore.equals(blockStore)){ + } else if (DataStoreProperty.Value.CrailFileStore.equals(blockStore)){ final List fileAreas = ((CrailFileBlock) optionalBlock.get()).asFileAreas(keyRange); for (final FileArea fileArea : fileAreas) { try (ByteOutputContext.ByteOutputStream os = outputContext.newOutputStream()){ @@ -518,7 +518,6 @@ private static ControlMessage.BlockStore convertBlockStore( */ private static DataStoreProperty.Value convertBlockStore( final ControlMessage.BlockStore blockStoreType) { - LOG.info("HY: {}", blockStoreType.getClass()); switch (blockStoreType) { case MEMORY: return DataStoreProperty.Value.MemoryStore; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java index 5be4a21beb..f787c38461 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java @@ -52,8 +52,8 @@ public final class CrailFileBlock implements Block { private final Serializer serializer; private final String filePath; private final FileMetadata metadata; - private CrailStore fs = null; - private CrailFile file = null; + private final CrailStore fs; + private CrailFile file; /** * Constructor. @@ -75,21 +75,20 @@ public CrailFileBlock(final String blockId, this.serializer = serializer; this.filePath = filePath; this.metadata = metadata; - try { - this.fs = fs; - this.file = fs.create(filePath, CrailNodeType.DATAFILE, + this.fs = fs; + try { + this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) .get().asFile(); file.syncDir(); - } catch (Exception e1) { - try { - this.fs = fs; - this.file = fs.lookup(filePath).get().asFile(); - } catch (Exception e2) { - LOG.info("{} fetch failed", blockId); - } + } catch (Exception e1) { + try { + this.file = fs.lookup(filePath).get().asFile(); + } catch (Exception e2) { + LOG.info("{} fetch failed", blockId); } } + } /** * Writes the serialized data of this block having a specific key value as a partition to the file diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 67eda4868b..0be50a3efb 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -46,8 +46,8 @@ @ThreadSafe public final class CrailFileStore extends AbstractBlockStore implements RemoteFileStore { private final String fileDirectory; - private CrailConfiguration conf; - private CrailStore fs; + private final CrailConfiguration conf; + private final CrailStore fs; /** * Constructor. @@ -103,8 +103,6 @@ public void writeBlock(final Block block) throws BlockWriteException { * @throws BlockFetchException for any error occurred while trying to fetch a block. */ - @Override - public Optional readBlock(final String blockId) throws BlockFetchException { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); try { @@ -117,12 +115,10 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio } catch (final IOException e) { throw new BlockFetchException(e); } catch (Exception e) { - e.printStackTrace(); throw new BlockFetchException(e); } } } catch (Exception e) { - e.printStackTrace(); throw new BlockFetchException(e); } } @@ -148,7 +144,6 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { } catch (final IOException e) { throw new BlockFetchException(e); } catch (final Exception e) { - e.printStackTrace(); throw new BlockFetchException(e); } } From 4d0eb21210d7f0eb2c1777c8a1d559f35928d20e Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 24 Jun 2019 14:38:03 +0900 Subject: [PATCH 222/235] pom.xml version --- pom.xml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 0386a93c22..e9f8423f06 100644 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,7 @@ under the License. 9.4.10.v20180503 3.6.1 1.7.20 + 1.2-incubating-SNAPSHOT 2.13.0 2.0.0-beta.5 @@ -117,52 +118,52 @@ under the License. org.apache.crail crail-storage - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-storage-narpc - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-storage-rdma - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-storage-nvmf - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-rpc - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-rpc-narpc - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-hdfs - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-namenode - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-rpc - 1.2-incubating-SNAPSHOT + ${crail.version} org.apache.crail crail-client - 1.2-incubating-SNAPSHOT + ${crail.version} From 0e00fb65093c8ec96aa2f582a503fe3fd0932dac Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 24 Jun 2019 14:58:59 +0900 Subject: [PATCH 223/235] BlockManagerWorker reuse code --- .../nemo/runtime/executor/data/BlockManagerWorker.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 69b8e529d2..8cb4abd19b 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -342,20 +342,14 @@ public void run() { final Optional optionalBlock = getBlockStore(blockStore).readBlock(blockId); if (optionalBlock.isPresent()) { if (DataStoreProperty.Value.LocalFileStore.equals(blockStore) - || DataStoreProperty.Value.GlusterFileStore.equals(blockStore)) { + || DataStoreProperty.Value.GlusterFileStore.equals(blockStore) + || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { final List fileAreas = ((FileBlock) optionalBlock.get()).asFileAreas(keyRange); for (final FileArea fileArea : fileAreas) { try (ByteOutputContext.ByteOutputStream os = outputContext.newOutputStream()) { os.writeFileArea(fileArea); } } - } else if (DataStoreProperty.Value.CrailFileStore.equals(blockStore)){ - final List fileAreas = ((CrailFileBlock) optionalBlock.get()).asFileAreas(keyRange); - for (final FileArea fileArea : fileAreas) { - try (ByteOutputContext.ByteOutputStream os = outputContext.newOutputStream()){ - os.writeFileArea(fileArea); - } - } } else { final Iterable partitions = optionalBlock.get().readSerializedPartitions(keyRange); for (final SerializedPartition partition : partitions) { From d1c93040713afc76182596396898ad500eb64bc7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 24 Jun 2019 16:13:26 +0900 Subject: [PATCH 224/235] FileBlock to support Crail file (exception not specified yet) --- .../executor/data/block/FileBlock.java | 100 ++++++++++++++---- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index cb04980860..dc7f973384 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -18,6 +18,7 @@ */ package org.apache.nemo.runtime.executor.data.block; +import org.apache.crail.*; import org.apache.nemo.common.Pair; import org.apache.nemo.common.exception.BlockFetchException; import org.apache.nemo.common.exception.BlockWriteException; @@ -32,6 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; import java.io.*; import java.nio.file.Files; @@ -52,6 +54,10 @@ public final class FileBlock implements Block { private final Serializer serializer; private final String filePath; private final FileMetadata metadata; + private final Boolean crail; + @Nullable + private final CrailStore fs; + private CrailFile file; // not final since fetching the File might fail. /** * Constructor. @@ -70,19 +76,53 @@ public FileBlock(final String blockId, this.serializer = serializer; this.filePath = filePath; this.metadata = metadata; + this.crail = false; + this.fs = null; + this.file = null; + } + + /** + * Constructor for FileBlock that is being written in CrailFileStore. + * + * @param fs the {@link CrailStore} object created from {@link org.apache.crail.conf.CrailConfiguration} + */ + public FileBlock(final String blockId, + final Serializer serializer, + final String filePath, + final FileMetadata metadata, + final CrailStore fs) { + this.id = blockId; + this.nonCommittedPartitionsMap = new HashMap<>(); + this.serializer = serializer; + this.filePath = filePath; + this.metadata = metadata; + this.crail = true; + this.fs = fs; + try { + this.file = fs.create(filePath, CrailNodeType.DATAFILE, CrailStorageClass.DEFAULT, + CrailLocationClass.DEFAULT, true).get().asFile(); + file.syncDir(); + } catch (Exception e1) { + try { + this.file = fs.lookup(filePath).get().asFile(); + } catch (Exception e2) { + LOG.info("{} fetch failed", blockId); + throw new RuntimeException(); + } + } } /** * Writes the serialized data of this block having a specific key value as a partition to the file - * where this block resides. + * where this block resides. Supports both writing either in local file system or CrailFileSystem. * Invariant: This method does not support concurrent write. * * @param serializedPartitions the iterable of the serialized partitions to write. - * @throws IOException if fail to write. + * @throws Exception if fail to write. */ - private void writeToFile(final Iterable> serializedPartitions) - throws IOException { - try (final FileOutputStream fileOutputStream = new FileOutputStream(filePath, true)) { + private void writeToFile(final Iterable> serializedPartitions) throws Exception { + try (OutputStream fileOutputStream = crail + ? file.getBufferedOutputStream(0) : new FileOutputStream(filePath, true)) { for (final SerializedPartition serializedPartition : serializedPartitions) { // Reserve a partition write and get the metadata. metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); @@ -157,7 +197,7 @@ public void writeSerializedPartitions(final Iterable> par } else { try { writeToFile(partitions); - } catch (final IOException e) { + } catch (final Exception e) { throw new BlockWriteException(e); } } @@ -179,7 +219,8 @@ public Iterable> readPartitions(final KeyRange keyRang final List> deserializedPartitions = new ArrayList<>(); try { final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final FileInputStream fileStream = new FileInputStream(filePath)) { + try (InputStream fileStream = crail + ? file.getBufferedInputStream(file.getCapacity()) : new FileInputStream(filePath)) { for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { final K key = partitionMetadata.getKey(); if (keyRange.includes(key)) { @@ -200,8 +241,10 @@ public Iterable> readPartitions(final KeyRange keyRang new ByteArrayInputStream(partitionKeyBytes.right())); deserializedPartitions.add(deserializePartition); } - } catch (final IOException e) { - throw new BlockFetchException(e); + } catch (final IOException e1) { + throw new BlockFetchException(e1); + } catch (final Exception e2) { + //여기에 specific 한.. } return deserializedPartitions; @@ -224,7 +267,8 @@ public Iterable> readSerializedPartitions(final KeyRange // Deserialize the data final List> partitionsInRange = new ArrayList<>(); try { - try (final FileInputStream fileStream = new FileInputStream(filePath)) { + try (InputStream fileStream = crail + ? file.getBufferedInputStream(file.getCapacity()) : new FileInputStream(filePath)) { for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { final K key = partitionmetadata.getKey(); if (keyRange.includes(key)) { @@ -235,15 +279,17 @@ public Iterable> readSerializedPartitions(final KeyRange throw new IOException("The read data size does not match with the partition size."); } partitionsInRange.add(new SerializedPartition<>( - key, serializedData, serializedData.length)); + key, serializedData, serializedData.length)); } else { // Have to skip this partition. skipBytes(fileStream, partitionmetadata.getPartitionSize()); } } } - } catch (final IOException e) { - throw new BlockFetchException(e); + } catch (final IOException e1) { + throw new BlockFetchException(e1); + } catch (final Exception e2) { + // 여기에 specific Exception need to be thrown } return partitionsInRange; @@ -298,8 +344,21 @@ public List asFileAreas(final KeyRange keyRange) throws IOException { */ public void deleteFile() throws IOException { metadata.deleteMetadata(); - if (new File(filePath).exists()) { - Files.delete(Paths.get(filePath)); + if (!crail) { + if (new File(filePath).exists()) { + Files.delete(Paths.get(filePath)); + } + } else { + try { + if (fs.lookup(filePath).get() != null) { + fs.delete(filePath, true); + } + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + LOG.info("Failed to delete file"); + e.printStackTrace(); + } } } @@ -350,6 +409,9 @@ public synchronized void commitPartitions() throws BlockWriteException { nonCommittedPartitionsMap.clear(); } catch (final IOException e) { throw new BlockWriteException(e); + } catch (Exception e) { + //e.printStackTrace(); + //throw new 여기에 세부적인 exception 던지기 } } @@ -357,15 +419,11 @@ public synchronized void commitPartitions() throws BlockWriteException { * @return the ID of this block. */ @Override - public String getId() { - return id; - } + public String getId() { return id; } /** * @return whether this block is committed or not. */ @Override - public boolean isCommitted() { - return metadata.isCommitted(); - } + public boolean isCommitted() { return metadata.isCommitted(); } } From 5b1a71b32323b4fbf97aed6b48acc194f9559e2a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Mon, 24 Jun 2019 16:32:11 +0900 Subject: [PATCH 225/235] delete CrailFileBlock --- .../executor/data/BlockManagerWorker.java | 1 - .../executor/data/block/CrailFileBlock.java | 400 ------------------ .../executor/data/stores/CrailFileStore.java | 26 +- 3 files changed, 13 insertions(+), 414 deletions(-) delete mode 100644 runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 8cb4abd19b..ce3d1e5a26 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -36,7 +36,6 @@ import org.apache.nemo.runtime.executor.bytetransfer.ByteOutputContext; import org.apache.nemo.runtime.executor.bytetransfer.ByteTransfer; import org.apache.nemo.runtime.executor.data.block.Block; -import org.apache.nemo.runtime.executor.data.block.CrailFileBlock; import org.apache.nemo.runtime.executor.data.block.FileBlock; import org.apache.nemo.runtime.executor.data.partition.NonSerializedPartition; import org.apache.nemo.runtime.executor.data.partition.SerializedPartition; diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java deleted file mode 100644 index f787c38461..0000000000 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/CrailFileBlock.java +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.nemo.runtime.executor.data.block; - -import org.apache.crail.*; -import org.apache.nemo.common.KeyRange; -import org.apache.nemo.common.Pair; -import org.apache.nemo.common.exception.BlockFetchException; -import org.apache.nemo.common.exception.BlockWriteException; -import org.apache.nemo.runtime.executor.data.DataUtil; -import org.apache.nemo.runtime.executor.data.FileArea; -import org.apache.nemo.runtime.executor.data.metadata.FileMetadata; -import org.apache.nemo.runtime.executor.data.metadata.PartitionMetadata; -import org.apache.nemo.runtime.executor.data.partition.NonSerializedPartition; -import org.apache.nemo.runtime.executor.data.partition.Partition; -import org.apache.nemo.runtime.executor.data.partition.SerializedPartition; -import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.concurrent.NotThreadSafe; -import java.io.*; -import java.util.*; - -/** - * This class represents a block which is stored in CrailStore. - * Concurrent read is supported, but concurrent write is not supported. - * - * @param the key type of its partitions. - */ -@NotThreadSafe -public final class CrailFileBlock implements Block { - private static final Logger LOG = LoggerFactory.getLogger(CrailFileBlock.class.getName()); - private final String id; - private final Map> nonCommittedPartitionsMap; - private final Serializer serializer; - private final String filePath; - private final FileMetadata metadata; - private final CrailStore fs; - private CrailFile file; - - /** - * Constructor. - * - * @param blockId the ID of this block. - * @param serializer the {@link Serializer}. - * @param filePath the path of the file that this block will be stored. - * @param metadata the metadata for this block. - * @param fs CrailStore instance of the Crail storage. - */ - - public CrailFileBlock(final String blockId, - final Serializer serializer, - final String filePath, - final FileMetadata metadata, - final CrailStore fs) { - this.id = blockId; - this.nonCommittedPartitionsMap = new HashMap<>(); - this.serializer = serializer; - this.filePath = filePath; - this.metadata = metadata; - this.fs = fs; - try { - this.file = fs.create(filePath, CrailNodeType.DATAFILE, - CrailStorageClass.DEFAULT, CrailLocationClass.DEFAULT, true) - .get().asFile(); - file.syncDir(); - } catch (Exception e1) { - try { - this.file = fs.lookup(filePath).get().asFile(); - } catch (Exception e2) { - LOG.info("{} fetch failed", blockId); - } - } - } - - /** - * Writes the serialized data of this block having a specific key value as a partition to the file - * where this block resides. - * Invariant: This method does not support concurrent write. - * - * @param serializedPartitions the iterable of the serialized partitions to write. - * @throws Exception if fail to write. - */ - private void writeToFile(final Iterable> serializedPartitions) throws Exception { - final CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0); - for (final SerializedPartition serializedPartition : serializedPartitions) { - metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); - fileOutputStream.write(serializedPartition.getData(), 0, serializedPartition.getLength()); - } - fileOutputStream.close(); - } - - /** - * Writes an element to non-committed block. - * Invariant: This should not be invoked after this block is committed. - * Invariant: This method does not support concurrent write. - * - * @param key the key. - * @param element the element to write. - * @throws BlockWriteException for any error occurred while trying to write a block. - */ - @Override - public void write(final K key, - final Object element) throws BlockWriteException { - if (metadata.isCommitted()) { - throw new BlockWriteException(new Throwable("The partition is already committed!")); - } else { - try { - SerializedPartition partition = nonCommittedPartitionsMap.get(key); - if (partition == null) { - partition = new SerializedPartition<>(key, serializer); - nonCommittedPartitionsMap.put(key, partition); - } - partition.write(element); - } catch (final IOException e) { - throw new BlockWriteException(e); - } - } - } - - /** - * Writes {@link NonSerializedPartition}s to this block. - * Invariant: This method does not support concurrent write. - * - * @param partitions the {@link NonSerializedPartition}s to write. - * @throws BlockWriteException for any error occurred while trying to write a block. - */ - @Override - public void writePartitions(final Iterable> partitions) - throws BlockWriteException { - if (metadata.isCommitted()) { - throw new BlockWriteException(new Throwable("The partition is already committed!")); - } else { - try { - final Iterable> convertedPartitions = - DataUtil.convertToSerPartitions(serializer, partitions); - writeSerializedPartitions(convertedPartitions); - } catch (final IOException e) { - throw new BlockWriteException(e); - } - } - } - - /** - * Writes {@link SerializedPartition}s to this block. - * Invariant: This method does not support concurrent write. - * - * @param partitions the {@link SerializedPartition}s to store. - * @throws BlockWriteException for any error occurred while trying to write a block. - */ - @Override - public void writeSerializedPartitions(final Iterable> partitions) - throws BlockWriteException { - if (metadata.isCommitted()) { - throw new BlockWriteException(new Throwable("The partition is already committed!")); - } else { - try { - writeToFile(partitions); - } catch (final IOException e) { - throw new BlockWriteException(e); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - - /** - * Retrieves the partitions of this block from the file in a specific key range and deserializes it. - * - * @param keyRange the key range. - * @return an iterable of {@link NonSerializedPartition}s. - * @throws BlockFetchException for any error occurred while trying to fetch a block. - */ - @Override - public Iterable> readPartitions(final KeyRange keyRange) throws BlockFetchException { - if (!metadata.isCommitted()) { - throw new BlockFetchException(new Throwable("Cannot retrieve elements before a block is committed")); - } else { - // Deserialize the data - final List> deserializedPartitions = new ArrayList<>(); - try { - final List> partitionKeyBytesPairs = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())) { - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { - final K key = partitionMetadata.getKey(); - if (keyRange.includes(key)) { - // The key value of this partition is in the range. - final byte[] partitionBytes = new byte[partitionMetadata.getPartitionSize()]; - fileStream.read(partitionBytes, 0, partitionMetadata.getPartitionSize()); - partitionKeyBytesPairs.add(Pair.of(key, partitionBytes)); - } else { - // Have to skip this partition. - skipBytes(fileStream, partitionMetadata.getPartitionSize()); - } - } - } catch (Exception e) { - e.printStackTrace(); - } - for (final Pair partitionKeyBytes : partitionKeyBytesPairs) { - final NonSerializedPartition deserializePartition = - DataUtil.deserializePartition( - partitionKeyBytes.right().length, serializer, partitionKeyBytes.left(), - new ByteArrayInputStream(partitionKeyBytes.right())); - deserializedPartitions.add(deserializePartition); - } - } catch (final IOException e) { - throw new BlockFetchException(e); - } - - return deserializedPartitions; - } - } - - /** - * Retrieves the {@link SerializedPartition}s in a specific key range. - * Invariant: This should not be invoked before this block is committed. - * - * @param keyRange the key range to retrieve. - * @return an iterable of {@link SerializedPartition}s. - * @throws BlockFetchException for any error occurred while trying to fetch a block. - */ - @Override - public Iterable> readSerializedPartitions(final KeyRange keyRange) throws BlockFetchException { - if (!metadata.isCommitted()) { - throw new BlockFetchException(new Throwable("Cannot retrieve elements before a block is committed")); - } else { - // Deserialize the data - final List> partitionsInRange = new ArrayList<>(); - try (final CrailBufferedInputStream fileStream = file.getBufferedInputStream(file.getCapacity())) { - for (final PartitionMetadata partitionmetadata : metadata.getPartitionMetadataList()) { - final K key = partitionmetadata.getKey(); - if (keyRange.includes(key)) { - // The hash value of this partition is in the range. - final byte[] serializedData = new byte[partitionmetadata.getPartitionSize()]; - final int readBytes = fileStream.read(serializedData); - if (readBytes != serializedData.length) { - throw new IOException("The read data size does not match with the partition size."); - } - partitionsInRange.add(new SerializedPartition<>( - key, serializedData, serializedData.length)); - } else { - // Have to skip this partition. - skipBytes(fileStream, partitionmetadata.getPartitionSize()); - } - } - } catch (final IOException e) { - throw new BlockFetchException(e); - } catch (final Exception e2) { - e2.printStackTrace(); - } - - return partitionsInRange; - } - } - - /** - * Skips some bytes in a input stream. - * - * @param inputStream the stream to skip. - * @param bytesToSkip the number of bytes to skip. - * @throws IOException if fail to skip. - */ - private void skipBytes(final InputStream inputStream, - final long bytesToSkip) throws IOException { - long remainingBytesToSkip = bytesToSkip; - while (remainingBytesToSkip > 0) { - final long skippedBytes = inputStream.skip(bytesToSkip); - remainingBytesToSkip -= skippedBytes; - if (skippedBytes <= 0) { - throw new IOException("The file stream failed to skip to the next block."); - } - } - } - - /** - * Retrieves the list of {@link FileArea}s for the specified {@link KeyRange}. - * - * @param keyRange the key range - * @return list of the file areas - * @throws IOException if failed to open a file channel - */ - public List asFileAreas(final KeyRange keyRange) throws IOException { - if (!metadata.isCommitted()) { - throw new IOException("Cannot retrieve elements before a block is committed"); - } else { - final List fileAreas = new ArrayList<>(); - for (final PartitionMetadata partitionMetadata : metadata.getPartitionMetadataList()) { - if (keyRange.includes(partitionMetadata.getKey())) { - fileAreas.add(new FileArea(filePath, partitionMetadata.getOffset(), partitionMetadata.getPartitionSize())); - } - } - return fileAreas; - } - } - - /** - * Deletes the file that contains this block data. - * This method have to be called after all read is completed (or failed). - * - * @throws IOException if failed to delete. - */ - public void deleteFile() throws IOException { - metadata.deleteMetadata(); - try { - if (fs.lookup(filePath).get() != null) { - fs.delete(filePath, true); - } - } catch (IOException e) { - e.printStackTrace(); - } catch (Exception e) { - LOG.info("Failed to delete file"); - e.printStackTrace(); - } - } - - /** - * Commits this block to prevent further write. - * - * @return the size of each partition. - * @throws BlockWriteException for any error occurred while trying to write a block. - */ - @Override - public synchronized Optional> commit() throws BlockWriteException { - try { - if (!metadata.isCommitted()) { - commitPartitions(); - metadata.commitBlock(); - } - final List> partitionMetadataList = metadata.getPartitionMetadataList(); - final Map partitionSizes = new HashMap<>(partitionMetadataList.size()); - for (final PartitionMetadata partitionMetadata : partitionMetadataList) { - final K key = partitionMetadata.getKey(); - final long partitionSize = partitionMetadata.getPartitionSize(); - if (partitionSizes.containsKey(key)) { - partitionSizes.compute(key, - (existingKey, existingValue) -> existingValue + partitionSize); - } else { - partitionSizes.put(key, partitionSize); - } - } - return Optional.of(partitionSizes); - } catch (final IOException e) { - throw new BlockWriteException(e); - } - } - - /** - * Commits all un-committed partitions. - * The committed partitions will be flushed to the storage. - */ - @Override - public synchronized void commitPartitions() throws BlockWriteException { - final List> partitions = new ArrayList<>(); - try { - for (final Partition partition : nonCommittedPartitionsMap.values()) { - partition.commit(); - partitions.add((SerializedPartition) partition); - } - writeToFile(partitions); - nonCommittedPartitionsMap.clear(); - } catch (final IOException e) { - throw new BlockWriteException(e); - } catch (Exception e) { - e.printStackTrace(); - } - } - - /** - * @return the ID of this block. - */ - @Override - public String getId() { - return id; - } - - /** - * @return whether this block is committed or not. - */ - @Override - public boolean isCommitted() { - return metadata.isCommitted(); - } -} diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 0be50a3efb..092212d02f 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -25,7 +25,7 @@ import org.apache.nemo.common.exception.BlockWriteException; import org.apache.nemo.runtime.executor.data.*; import org.apache.nemo.runtime.executor.data.block.Block; -import org.apache.nemo.runtime.executor.data.block.CrailFileBlock; +import org.apache.nemo.runtime.executor.data.block.FileBlock; import org.apache.nemo.runtime.executor.data.metadata.CrailFileMetadata; import org.apache.nemo.runtime.executor.data.streamchainer.Serializer; import org.apache.reef.tang.annotations.Parameter; @@ -39,8 +39,8 @@ /** * Stores blocks in CrailStore. * Since the data is stored in CrailStore and globally accessed by multiple nodes, - * each read, or deletion for a file needs one instance of {@link CrailFileBlock}. - * When CrailFileBlock is created, it's metadata is maintained in memory until the block is committed. + * each read, or deletion for a file needs one instance of {@link FileBlock}. + * When FileBlock in Crail is created, it's metadata is maintained in memory until the block is committed. * After the block is committed, the metadata is stored in and read from a CrailStore. */ @ThreadSafe @@ -74,7 +74,7 @@ public Block createBlock(final String blockId) { final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final String metaPath = DataUtil.blockIdToMetaFilePath(blockId, fileDirectory); final CrailFileMetadata metadata = CrailFileMetadata.create(metaPath, fs); - return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); + return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } /** @@ -86,9 +86,9 @@ public Block createBlock(final String blockId) { @Override public void writeBlock(final Block block) throws BlockWriteException { - if (!(block instanceof CrailFileBlock)) { + if (!(block instanceof FileBlock)) { throw new BlockWriteException(new Throwable( - this.toString() + " only accept " + CrailFileBlock.class.getName())); + this.toString() + " only accept " + FileBlock.class.getName())); } else if (!block.isCommitted()) { throw new BlockWriteException(new Throwable("The block " + block.getId() + "is not committed yet.")); } @@ -110,7 +110,7 @@ public Optional readBlock(final String blockId) throws BlockFetchExceptio return Optional.empty(); } else { try { - final CrailFileBlock block = getBlockFromFile(blockId); + final FileBlock block = getBlockFromFile(blockId); return Optional.of(block); } catch (final IOException e) { throw new BlockFetchException(e); @@ -135,7 +135,7 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { try { if (fs.lookup(filePath).get() != null) { - final CrailFileBlock block = getBlockFromFile(blockId); + final FileBlock block = getBlockFromFile(blockId); block.deleteFile(); return true; } else { @@ -149,21 +149,21 @@ public boolean deleteBlock(final String blockId) throws BlockFetchException { } /** - * Gets a {@link CrailFileBlock} from the block and it's metadata file. + * Gets a {@link FileBlock} from the block and it's metadata file. * Because the data is stored in CrailStore and globally accessed by multiple nodes, - * each read, or deletion for a file needs one instance of {@link CrailFileBlock}, + * each read, or deletion for a file needs one instance of {@link FileBlock}, * and the temporary block will not be maintained by this executor. * * @param blockId the ID of the block to get. * @param the type of the key of the block. - * @return the {@link CrailFileBlock} gotten. + * @return the {@link FileBlock} gotten. * @throws Exception if fail to get. */ - private CrailFileBlock getBlockFromFile(final String blockId) throws Exception { + private FileBlock getBlockFromFile(final String blockId) throws Exception { final Serializer serializer = getSerializerFromWorker(blockId); final String filePath = DataUtil.blockIdToFilePath(blockId, fileDirectory); final CrailFileMetadata metadata = CrailFileMetadata.open(DataUtil.blockIdToMetaFilePath(blockId, fileDirectory), fs); - return new CrailFileBlock<>(blockId, serializer, filePath, metadata, fs); + return new FileBlock<>(blockId, serializer, filePath, metadata, fs); } } From 29e708f19708dbde3b196d4e90b2b64eec1996d7 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 14:35:18 +0900 Subject: [PATCH 226/235] API update --- .../annotating/CrailEdgeDataStorePass.java | 5 ++--- .../compiler/optimizer/policy/CrailPolicy.java | 14 +++++--------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java index b801b0a9f3..3251ef70e0 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/pass/compiletime/annotating/CrailEdgeDataStorePass.java @@ -18,10 +18,9 @@ */ package org.apache.nemo.compiler.optimizer.pass.compiletime.annotating; +import org.apache.nemo.common.ir.IRDAG; import org.apache.nemo.common.ir.edge.IREdge; import org.apache.nemo.common.ir.edge.executionproperty.DataStoreProperty; -import org.apache.nemo.common.ir.vertex.IRVertex; -import org.apache.nemo.common.dag.DAG; import org.apache.nemo.compiler.optimizer.pass.compiletime.Requires; import java.util.List; @@ -41,7 +40,7 @@ public CrailEdgeDataStorePass() { } @Override - public DAG apply(final DAG dag) { + public IRDAG apply(final IRDAG dag) { dag.getVertices().forEach(vertex -> { // Initialize the DataStore of the DAG with CrailFileStore. final List inEdges = dag.getIncomingEdgesOf(vertex); inEdges.forEach(edge -> diff --git a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java index 2ac0accd21..dc63896514 100644 --- a/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java +++ b/compiler/optimizer/src/main/java/org/apache/nemo/compiler/optimizer/policy/CrailPolicy.java @@ -18,14 +18,10 @@ */ package org.apache.nemo.compiler.optimizer.policy; -import org.apache.nemo.common.dag.DAG; -import org.apache.nemo.common.eventhandler.PubSubEventHandlerWrapper; -import org.apache.nemo.common.ir.edge.IREdge; -import org.apache.nemo.common.ir.vertex.IRVertex; +import org.apache.nemo.common.ir.IRDAG; import org.apache.nemo.compiler.optimizer.pass.compiletime.annotating.*; import org.apache.nemo.compiler.optimizer.pass.compiletime.composite.DefaultCompositePass; -import org.apache.nemo.compiler.optimizer.pass.compiletime.composite.LoopOptimizationCompositePass; -import org.apache.reef.tang.Injector; +import org.apache.nemo.compiler.optimizer.pass.runtime.Message; /** * A policy to demonstrate the disaggregation optimization, that uses CrailFS as file storage. @@ -45,12 +41,12 @@ public CrailPolicy() { } @Override - public DAG runCompileTimeOptimization(final DAG dag, final String dagDirectory) { + public IRDAG runCompileTimeOptimization(final IRDAG dag, final String dagDirectory) { return this.policy.runCompileTimeOptimization(dag, dagDirectory); } @Override - public void registerRunTimeOptimizations(final Injector injector, final PubSubEventHandlerWrapper pubSubWrapper) { - this.policy.registerRunTimeOptimizations(injector, pubSubWrapper); + public IRDAG runRunTimeOptimizations(final IRDAG dag, final Message message) { + return this.policy.runRunTimeOptimizations(dag, message); } } From f154cd77522e1881b6123c487e8a3fd64f96cc1a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 14:40:37 +0900 Subject: [PATCH 227/235] crail dependency --- pom.xml | 100 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/pom.xml b/pom.xml index 22db300349..2bea10d2b4 100644 --- a/pom.xml +++ b/pom.xml @@ -93,6 +93,56 @@ under the License. snappy-java 1.1.1.3 + + org.apache.crail + crail-storage + ${crail.version} + + + org.apache.crail + crail-storage-narpc + ${crail.version} + + + org.apache.crail + crail-storage-rdma + ${crail.version} + + + org.apache.crail + crail-storage-nvmf + ${crail.version} + + + org.apache.crail + crail-rpc + ${crail.version} + + + org.apache.crail + crail-rpc-narpc + ${crail.version} + + + org.apache.crail + crail-hdfs + ${crail.version} + + + org.apache.crail + crail-namenode + ${crail.version} + + + org.apache.crail + crail-rpc + ${crail.version} + + + org.apache.crail + crail-client + ${crail.version} + junit @@ -154,56 +204,6 @@ under the License. 1.6.2 test - - org.apache.crail - crail-storage - ${crail.version} - - - org.apache.crail - crail-storage-narpc - ${crail.version} - - - org.apache.crail - crail-storage-rdma - ${crail.version} - - - org.apache.crail - crail-storage-nvmf - ${crail.version} - - - org.apache.crail - crail-rpc - ${crail.version} - - - org.apache.crail - crail-rpc-narpc - ${crail.version} - - - org.apache.crail - crail-hdfs - ${crail.version} - - - org.apache.crail - crail-namenode - ${crail.version} - - - org.apache.crail - crail-rpc - ${crail.version} - - - org.apache.crail - crail-client - ${crail.version} - From 4cf9c3265a7979cf9eece266f61d30aca0c1247a Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 14:43:55 +0900 Subject: [PATCH 228/235] import edit --- .../driver/src/main/java/org/apache/nemo/driver/NemoDriver.java | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java index a50b78da1d..61abfeaa20 100644 --- a/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java +++ b/runtime/driver/src/main/java/org/apache/nemo/driver/NemoDriver.java @@ -30,7 +30,6 @@ import org.apache.nemo.runtime.common.message.MessageParameters; import org.apache.nemo.runtime.executor.data.stores.CrailFileStore; import org.apache.nemo.runtime.executor.data.stores.RemoteFileStore; -import org.apache.nemo.runtime.master.ClientRPC; import org.apache.nemo.runtime.master.BroadcastManagerMaster; import org.apache.nemo.runtime.master.RuntimeMaster; From 35a66851df7c112d2705e629e520aaeeb7bc6369 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 15:04:13 +0900 Subject: [PATCH 229/235] crail version edit --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2bea10d2b4..600826ab31 100644 --- a/pom.xml +++ b/pom.xml @@ -64,7 +64,7 @@ under the License. 3.25.2 42.2.5 1.7.20 - 1.2-incubating-SNAPSHOT + 1.1-incubating 2.13.0 2.0.0-beta.5 From 707847a84c6068eb699e026dd3fc2ec3887d3bbd Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 15:08:59 +0900 Subject: [PATCH 230/235] checkstyle --- conf/src/main/java/org/apache/nemo/conf/JobConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/src/main/java/org/apache/nemo/conf/JobConf.java b/conf/src/main/java/org/apache/nemo/conf/JobConf.java index 046abc1662..cc09f2d5c0 100644 --- a/conf/src/main/java/org/apache/nemo/conf/JobConf.java +++ b/conf/src/main/java/org/apache/nemo/conf/JobConf.java @@ -90,7 +90,7 @@ public final class CrailVolumeDirectory implements Name { @NamedParameter(doc = "Option for RemoteFileStore", short_name = "remote_option", default_value = "Gluster") public final class RemoteFileStoreOpt implements Name { } - + /** * Specifies the type of the environment the workload runs on. (e.g., transient / large_shuffle) */ From c637f34e096ea757c515b725e9c36d0bcbd1a404 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 15:27:16 +0900 Subject: [PATCH 231/235] checktyle (CrailFileMetaData needs refactoring) --- .../runtime/executor/data/BlockManagerWorker.java | 6 ++++-- .../runtime/executor/data/block/FileBlock.java | 8 ++++++-- .../executor/data/metadata/CrailFileMetadata.java | 14 +++++++------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java index 3a394725a3..d18a5c1d2c 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/BlockManagerWorker.java @@ -268,7 +268,8 @@ public void writeBlock(final Block block, .setBlockId(blockId) .setState(ControlMessage.BlockStateFromExecutor.AVAILABLE); - if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore) || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { + if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore) + || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { blockStateChangedMsgBuilder.setLocation(REMOTE_FILE_STORE); } else { blockStateChangedMsgBuilder.setLocation(executorId); @@ -302,7 +303,8 @@ public void removeBlock(final String blockId, .setBlockId(blockId) .setState(ControlMessage.BlockStateFromExecutor.NOT_AVAILABLE); - if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore) || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { + if (DataStoreProperty.Value.GlusterFileStore.equals(blockStore) + || DataStoreProperty.Value.CrailFileStore.equals(blockStore)) { blockStateChangedMsgBuilder.setLocation(REMOTE_FILE_STORE); } else { blockStateChangedMsgBuilder.setLocation(executorId); diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 41c4d6ca8b..2f1c870f89 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -420,11 +420,15 @@ public synchronized void commitPartitions() throws BlockWriteException { * @return the ID of this block. */ @Override - public String getId() { return id; } + public String getId() { + return id; + } /** * @return whether this block is committed or not. */ @Override - public boolean isCommitted() { return metadata.isCommitted(); } + public boolean isCommitted() { + return metadata.isCommitted(); + } } diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java index 543bc2806a..8832c891f4 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/metadata/CrailFileMetadata.java @@ -110,28 +110,28 @@ public synchronized void commitBlock() throws IOException { * Creates a new block metadata. * * @param metaFilePath the path of the file to write metadata. - * @param fs the CrailStore instance. + * @param cs the CrailStore instance. * @param the key type of the block's partitions. * @return the created block metadata. */ - public static CrailFileMetadata create(final String metaFilePath, final CrailStore fs) { - return new CrailFileMetadata<>(metaFilePath, fs); + public static CrailFileMetadata create(final String metaFilePath, final CrailStore cs) { + return new CrailFileMetadata<>(metaFilePath, cs); } /** * Opens a existing block metadata in file. * * @param metaFilePath the path of the file to write metadata. - * @param fs the CrailStore instance + * @param cs the CrailStore instance * @param the key type of the block's partitions. * @return the created block metadata. * @throws IOException if fail to open. */ public static CrailFileMetadata open(final String metaFilePath, - final CrailStore fs) throws IOException { + final CrailStore cs) throws IOException { final List> partitionMetadataList = new ArrayList<>(); try { - CrailBufferedInputStream dataInputStream = fs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); + CrailBufferedInputStream dataInputStream = cs.lookup(metaFilePath).get().asFile().getBufferedInputStream(0); while (dataInputStream.available() > 0) { final int keyLength = dataInputStream.readInt(); final byte[] desKey = new byte[keyLength]; @@ -149,6 +149,6 @@ public static CrailFileMetadata open(final String me } catch (Exception e) { throw new IOException("Metadata " + metaFilePath + " does not exist!"); } - return new CrailFileMetadata<>(metaFilePath, partitionMetadataList, fs); + return new CrailFileMetadata<>(metaFilePath, partitionMetadataList, cs); } } From c9f8b2c553f453dd68d0bf3c42c1f6c0930a7ad6 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 15:57:24 +0900 Subject: [PATCH 232/235] API back to 1.1 --- .../nemo/runtime/executor/data/stores/CrailFileStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java index 092212d02f..8c39872b00 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/stores/CrailFileStore.java @@ -62,7 +62,7 @@ private CrailFileStore(@Parameter(JobConf.CrailVolumeDirectory.class) final Stri @Parameter(JobConf.JobId.class) final String jobId, final SerializerManager serializerManager) throws Exception { super(serializerManager); - this.conf = CrailConfiguration.createConfigurationFromFile(); + this.conf = new CrailConfiguration(); this.fs = CrailStore.newInstance(conf); this.fileDirectory = volumeDirectory; } From 3ea30cd5fa6c7ab886c0b71e9ffc18dee6736176 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 16:33:44 +0900 Subject: [PATCH 233/235] import --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index e8f9123c6f..e44e0aed5e 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -40,7 +40,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; +import java.io.OutputStream; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; From 1ec909ee2ef4ef4c6ce84d74227cc4fcc5033a33 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 16:44:16 +0900 Subject: [PATCH 234/235] cannot resolve outputstream? --- .../org/apache/nemo/runtime/executor/data/block/FileBlock.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index e44e0aed5e..8956a40ce0 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -40,7 +40,6 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.OutputStream; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; @@ -136,7 +135,7 @@ public FileBlock(final String blockId, */ private void writeToFile(final Iterable> serializedPartitions) throws Exception { if (crail) { - try (OutputStream fileOutputStream = file.getBufferedOutputStream(0)) { + try (CrailBufferedOutputStream fileOutputStream = file.getBufferedOutputStream(0)) { for (final SerializedPartition serializedPartition : serializedPartitions) { // Reserve a partition write and get the metadata. metadata.writePartitionMetadata(serializedPartition.getKey(), serializedPartition.getLength()); From a6ed1d6d44ddef0408c399626572540c791f3035 Mon Sep 17 00:00:00 2001 From: Haeyoon Cho Date: Tue, 25 Jun 2019 16:53:55 +0900 Subject: [PATCH 235/235] cannot resolve outputstream? --- .../apache/nemo/runtime/executor/data/block/FileBlock.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java index 8956a40ce0..48e6b938b3 100644 --- a/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java +++ b/runtime/executor/src/main/java/org/apache/nemo/runtime/executor/data/block/FileBlock.java @@ -37,12 +37,7 @@ import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; +import java.io.*; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files;