You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from ts.flint import FlintContext
from ts.flint import summarizers
from ts.flint import TimeSeriesDataFrame
from pyspark.sql.functions import from_utc_timestamp, col
flintContext = FlintContext(sqlContext)
df = spark.createDataFrame(
[('2018-08-20', 1.0), ('2018-08-21', 2.0), ('2018-08-24', 3.0)],
['time', 'v']
).withColumn('time', from_utc_timestamp(col('time'), 'UTC'))
# Convert to Flint DataFrame
flint_df = flintContext.read.dataframe(df)
# Use Spark DataFrame functionality
flint_df = flint_df.withColumn('v', flint_df['v'] + 1)
# Use Flint functionality
flint_df = flint_df.summarizeCycles(summarizers.count())
and the last command returns an error:
SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 35, 10.9.37.18, executor 0): java.lang.NoClassDefFoundError: Could not initialize class com.twosigma.flint.rdd.PartitionsIterator$
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:309)
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:303)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:545)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:544)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 35, 10.9.37.18, executor 0): java.lang.NoClassDefFoundError: Could not initialize class com.twosigma.flint.rdd.PartitionsIterator$
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:309)
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:303)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:545)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:544)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2355)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2343)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2342)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2342)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1096)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1096)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1096)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2574)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2510)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:893)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2243)
at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:270)
at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:280)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:80)
at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:86)
at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:508)
at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:55)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectResult(Dataset.scala:2842)
at org.apache.spark.sql.Dataset$$anonfun$collectResult$1.apply(Dataset.scala:2833)
at org.apache.spark.sql.Dataset$$anonfun$collectResult$1.apply(Dataset.scala:2832)
at org.apache.spark.sql.Dataset$$anonfun$56.apply(Dataset.scala:3446)
at org.apache.spark.sql.Dataset$$anonfun$56.apply(Dataset.scala:3441)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:111)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:240)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:97)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:170)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3441)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:2832)
at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:149)
at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:54)
at com.databricks.backend.daemon.driver.PythonDriverLocal$$anonfun$getResultBufferInternal$1.apply(PythonDriverLocal.scala:828)
at com.databricks.backend.daemon.driver.PythonDriverLocal$$anonfun$getResultBufferInternal$1.apply(PythonDriverLocal.scala:783)
at com.databricks.backend.daemon.driver.PythonDriverLocal.withInterpLock(PythonDriverLocal.scala:728)
at com.databricks.backend.daemon.driver.PythonDriverLocal.getResultBufferInternal(PythonDriverLocal.scala:783)
at com.databricks.backend.daemon.driver.DriverLocal.getResultBuffer(DriverLocal.scala:463)
at com.databricks.backend.daemon.driver.PythonDriverLocal.com$databricks$backend$daemon$driver$PythonDriverLocal$$outputSuccess(PythonDriverLocal.scala:770)
at com.databricks.backend.daemon.driver.PythonDriverLocal$$anonfun$repl$6.apply(PythonDriverLocal.scala:322)
at com.databricks.backend.daemon.driver.PythonDriverLocal$$anonfun$repl$6.apply(PythonDriverLocal.scala:309)
at com.databricks.backend.daemon.driver.PythonDriverLocal.withInterpLock(PythonDriverLocal.scala:728)
at com.databricks.backend.daemon.driver.PythonDriverLocal.repl(PythonDriverLocal.scala:309)
at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$8.apply(DriverLocal.scala:373)
at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$8.apply(DriverLocal.scala:350)
at com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:238)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:233)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:48)
at com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:271)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:48)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:350)
at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:644)
at scala.util.Try$.apply(Try.scala:192)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:639)
at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:485)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:597)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:390)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:337)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:219)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: Could not initialize class com.twosigma.flint.rdd.PartitionsIterator$
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:309)
at com.twosigma.flint.rdd.Conversion$$anonfun$fromRDD$4.apply(Conversion.scala:303)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:545)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$summarizeByKey$1.apply(OrderedRDD.scala:544)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD$$anonfun$mapValues$1.apply(OrderedRDD.scala:472)
at com.twosigma.flint.rdd.OrderedRDD.compute(OrderedRDD.scala:169)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:340)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:304)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1526)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
I am using Spark 2.4.3, Scala 2.11 and Python 3.6.
Do you know what is the reason of this error? Is this due to Scala version?
Thanks in advance!
The text was updated successfully, but these errors were encountered:
@gustronchin no, I didn't.
I assume that the problem is due to Scala version. I have Spark 2.4.3, Scala 2.11 and Python 3.6 and as described here in requirements I need Scala 2.12.
I do not have opportunity to change Scala version on my environment so I gave up with using Flint due to this issue.
Hi,
I am trying to run sample code:
and the last command returns an error:
I am using Spark 2.4.3, Scala 2.11 and Python 3.6.
Do you know what is the reason of this error? Is this due to Scala version?
Thanks in advance!
The text was updated successfully, but these errors were encountered: