From fc08ea102ae3d709fc514cedc6621f717e4d5125 Mon Sep 17 00:00:00 2001 From: Edmund Higham Date: Wed, 22 Jan 2025 16:46:53 -0500 Subject: [PATCH] [query] Move `is.hail.check` to test code --- hail/hail/src/is/hail/check/Gen.scala | 28 +- .../src/is/hail/types/physical/PArray.scala | 6 - .../is/hail/types/physical/PBaseStruct.scala | 9 +- .../src/is/hail/types/physical/PDict.scala | 6 - .../src/is/hail/types/physical/PSet.scala | 6 - .../src/is/hail/types/physical/PType.scala | 113 ---- .../src/is/hail/types/virtual/TArray.scala | 4 - .../is/hail/types/virtual/TBaseStruct.scala | 13 - .../src/is/hail/types/virtual/TBinary.scala | 7 +- .../src/is/hail/types/virtual/TBoolean.scala | 6 +- .../src/is/hail/types/virtual/TCall.scala | 5 +- .../src/is/hail/types/virtual/TDict.scala | 4 - .../src/is/hail/types/virtual/TFloat32.scala | 7 +- .../src/is/hail/types/virtual/TFloat64.scala | 6 +- .../src/is/hail/types/virtual/TInt32.scala | 6 +- .../src/is/hail/types/virtual/TInt64.scala | 6 +- .../src/is/hail/types/virtual/TInterval.scala | 6 +- .../src/is/hail/types/virtual/TLocus.scala | 4 - .../src/is/hail/types/virtual/TNDArray.scala | 3 - .../src/is/hail/types/virtual/TRNGState.scala | 3 - .../hail/src/is/hail/types/virtual/TSet.scala | 4 - .../src/is/hail/types/virtual/TStream.scala | 4 - .../src/is/hail/types/virtual/TString.scala | 6 +- .../src/is/hail/types/virtual/TUnion.scala | 5 +- .../src/is/hail/types/virtual/TVariable.scala | 5 +- .../src/is/hail/types/virtual/TVoid.scala | 5 +- .../hail/src/is/hail/types/virtual/Type.scala | 97 --- hail/hail/src/is/hail/utils/Interval.scala | 11 - hail/hail/src/is/hail/utils/package.scala | 8 - hail/hail/src/is/hail/variant/Call.scala | 50 -- hail/hail/src/is/hail/variant/Genotype.scala | 94 --- hail/hail/src/is/hail/variant/Locus.scala | 6 - .../src/is/hail/variant/ReferenceGenome.scala | 28 - .../src/is/hail/variant/VariantMethods.scala | 51 -- .../annotations/StagedConstructorSuite.scala | 11 +- .../src/is/hail/annotations/UnsafeSuite.scala | 1 - .../test/src/is/hail/check/Arbitrary.scala | 90 +++ hail/hail/test/src/is/hail/check/Gen.scala | 562 ++++++++++++++++ hail/hail/test/src/is/hail/check/Prop.scala | 155 +++++ .../test/src/is/hail/io/IndexBTreeSuite.scala | 2 - .../src/is/hail/linalg/BlockMatrixSuite.scala | 9 +- .../test/src/is/hail/methods/ExprSuite.scala | 1 - .../is/hail/methods/LocalLDPruneSuite.scala | 1 - .../is/hail/types/physical/GenInstances.scala | 623 ++++++++++++++++++ .../src/is/hail/utils/BinaryHeapSuite.scala | 2 - .../src/is/hail/utils/BitVectorSuite.scala | 3 - .../src/is/hail/variant/GenotypeSuite.scala | 1 - .../hail/variant/ReferenceGenomeSuite.scala | 1 - 48 files changed, 1462 insertions(+), 622 deletions(-) create mode 100644 hail/hail/test/src/is/hail/check/Arbitrary.scala create mode 100644 hail/hail/test/src/is/hail/check/Gen.scala create mode 100644 hail/hail/test/src/is/hail/check/Prop.scala create mode 100644 hail/hail/test/src/is/hail/types/physical/GenInstances.scala diff --git a/hail/hail/src/is/hail/check/Gen.scala b/hail/hail/src/is/hail/check/Gen.scala index ae157bf802a..be6a9f8eb94 100644 --- a/hail/hail/src/is/hail/check/Gen.scala +++ b/hail/hail/src/is/hail/check/Gen.scala @@ -537,36 +537,26 @@ object Gen { def sized[T](f: (Int) => Gen[T]): Gen[T] = Gen((p: Parameters) => f(p.size)(p)) - def applyGen[T, S](gf: Gen[(T) => S], gx: Gen[T]): Gen[S] = Gen { p => + def applyGen[T, S](gf: Gen[T => S], gx: Gen[T]): Gen[S] = Gen { p => val f = gf(p) val x = gx(p) f(x) } } -class Gen[+T](val gen: (Parameters) => T) extends AnyVal { - - def apply(p: Parameters): T = gen(p) +class Gen[+T](val apply: Parameters => T) extends AnyVal { def sample(): T = apply(Parameters.default) - def map[U](f: (T) => U): Gen[U] = Gen(p => f(apply(p))) + def map[U](f: T => U): Gen[U] = Gen(p => f(apply(p))) - def flatMap[U](f: (T) => Gen[U]): Gen[U] = Gen(p => f(apply(p))(p)) + def flatMap[U](f: T => Gen[U]): Gen[U] = Gen(p => f(apply(p))(p)) def resize(newSize: Int): Gen[T] = Gen((p: Parameters) => apply(p.copy(size = newSize))) + + def withFilter(f: T => Boolean): Gen[T] = + Gen((p: Parameters) => Stream.continually(apply(p)).takeWhile(f).head) - // FIXME should be non-strict - def withFilter(f: (T) => Boolean): Gen[T] = Gen { (p: Parameters) => - var x = apply(p) - var i = 0 - while (!f(x)) { - assert(i < 100) - x = apply(p) - i += 1 - } - x - } - - def filter(f: (T) => Boolean): Gen[T] = withFilter(f) + def filter(f: T => Boolean): Gen[T] = + withFilter(f) } diff --git a/hail/hail/src/is/hail/types/physical/PArray.scala b/hail/hail/src/is/hail/types/physical/PArray.scala index 8ec966eb8c1..a5ed89bbdce 100644 --- a/hail/hail/src/is/hail/types/physical/PArray.scala +++ b/hail/hail/src/is/hail/types/physical/PArray.scala @@ -1,8 +1,5 @@ package is.hail.types.physical -import is.hail.annotations.Annotation -import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.types.virtual.TArray trait PArrayIterator { @@ -17,7 +14,4 @@ abstract class PArray extends PArrayBackedContainer { final protected[physical] val elementRequired = elementType.required def elementIterator(aoff: Long, length: Int): PArrayIterator - - override def genNonmissingValue(sm: HailStateManager): Gen[IndexedSeq[Annotation]] = - Gen.buildableOf[Array](elementType.genValue(sm)).map(x => x: IndexedSeq[Annotation]) } diff --git a/hail/hail/src/is/hail/types/physical/PBaseStruct.scala b/hail/hail/src/is/hail/types/physical/PBaseStruct.scala index 2a6c1e7ca23..fddf0b248f4 100644 --- a/hail/hail/src/is/hail/types/physical/PBaseStruct.scala +++ b/hail/hail/src/is/hail/types/physical/PBaseStruct.scala @@ -1,9 +1,8 @@ package is.hail.types.physical import is.hail.annotations._ -import is.hail.asm4s.{Code, _} +import is.hail.asm4s._ import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical.stypes.interfaces.SBaseStructValue import is.hail.utils._ @@ -144,10 +143,4 @@ abstract class PBaseStruct extends PType { override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructValue override lazy val containsPointers: Boolean = types.exists(_.containsPointers) - - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - if (types.isEmpty) { - Gen.const(Annotation.empty) - } else - Gen.uniformSequence(types.map(t => t.genValue(sm))).map(a => Annotation(a: _*)) } diff --git a/hail/hail/src/is/hail/types/physical/PDict.scala b/hail/hail/src/is/hail/types/physical/PDict.scala index 246e924bf68..c6cc0996ffd 100644 --- a/hail/hail/src/is/hail/types/physical/PDict.scala +++ b/hail/hail/src/is/hail/types/physical/PDict.scala @@ -1,8 +1,5 @@ package is.hail.types.physical -import is.hail.annotations._ -import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.types.physical.stypes.interfaces.SContainer import is.hail.types.virtual.TDict @@ -15,7 +12,4 @@ abstract class PDict extends PContainer { def sType: SContainer def elementType: PStruct - - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Gen.buildableOf2[Map](Gen.zip(keyType.genValue(sm), valueType.genValue(sm))) } diff --git a/hail/hail/src/is/hail/types/physical/PSet.scala b/hail/hail/src/is/hail/types/physical/PSet.scala index 810bae84829..e6109bc58f8 100644 --- a/hail/hail/src/is/hail/types/physical/PSet.scala +++ b/hail/hail/src/is/hail/types/physical/PSet.scala @@ -1,13 +1,7 @@ package is.hail.types.physical -import is.hail.annotations._ -import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.types.virtual.TSet abstract class PSet extends PContainer { lazy val virtualType: TSet = TSet(elementType.virtualType) - - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Gen.buildableOf[Set](elementType.genValue(sm)) } diff --git a/hail/hail/src/is/hail/types/physical/PType.scala b/hail/hail/src/is/hail/types/physical/PType.scala index 533766fad94..73bb1ba1fd2 100644 --- a/hail/hail/src/is/hail/types/physical/PType.scala +++ b/hail/hail/src/is/hail/types/physical/PType.scala @@ -3,14 +3,12 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.{ExecuteContext, HailStateManager} -import is.hail.check.{Arbitrary, Gen} import is.hail.expr.ir._ import is.hail.types.{tcoerce, Requiredness} import is.hail.types.physical.stypes.{SType, SValue} import is.hail.types.physical.stypes.concrete.SRNGState import is.hail.types.virtual._ import is.hail.utils._ -import is.hail.variant.ReferenceGenome import org.apache.spark.sql.Row import org.json4s.CustomSerializer @@ -31,108 +29,6 @@ class PStructSerializer extends CustomSerializer[PStruct](format => ) object PType { - def genScalar(required: Boolean): Gen[PType] = - Gen.oneOf( - PBoolean(required), - PInt32(required), - PInt64(required), - PFloat32(required), - PFloat64(required), - PCanonicalString(required), - PCanonicalCall(required), - ) - - val genOptionalScalar: Gen[PType] = genScalar(false) - - val genRequiredScalar: Gen[PType] = genScalar(true) - - def genComplexType(required: Boolean): Gen[PType] = { - val rgDependents = ReferenceGenome.hailReferences.toArray.map(PCanonicalLocus(_, required)) - val others = Array(PCanonicalCall(required)) - Gen.oneOfSeq(rgDependents ++ others) - } - - def genFields(required: Boolean, genFieldType: Gen[PType]): Gen[Array[PField]] = { - Gen.buildableOf[Array]( - Gen.zip(Gen.identifier, genFieldType) - ) - .filter(fields => fields.map(_._1).areDistinct()) - .map(fields => - fields - .iterator - .zipWithIndex - .map { case ((k, t), i) => PField(k, t, i) } - .toArray - ) - } - - def preGenStruct(required: Boolean, genFieldType: Gen[PType]): Gen[PStruct] = - for (fields <- genFields(required, genFieldType)) yield PCanonicalStruct(fields, required) - - def preGenTuple(required: Boolean, genFieldType: Gen[PType]): Gen[PTuple] = - for (fields <- genFields(required, genFieldType)) - yield PCanonicalTuple(required, fields.map(_.typ): _*) - - private val defaultRequiredGenRatio = 0.2 - - def genStruct: Gen[PStruct] = Gen.coin(defaultRequiredGenRatio).flatMap(preGenStruct(_, genArb)) - - val genOptionalStruct: Gen[PType] = preGenStruct(required = false, genArb) - - val genRequiredStruct: Gen[PType] = preGenStruct(required = true, genArb) - - val genInsertableStruct: Gen[PStruct] = Gen.coin(defaultRequiredGenRatio).flatMap(required => - if (required) - preGenStruct(required = true, genArb) - else - preGenStruct(required = false, genOptional) - ) - - def genSized(size: Int, required: Boolean, genPStruct: Gen[PStruct]): Gen[PType] = - if (size < 1) - Gen.const(PCanonicalStruct.empty(required)) - else if (size < 2) - genScalar(required) - else { - Gen.frequency( - (4, genScalar(required)), - (1, genComplexType(required)), - ( - 1, - genArb.map { - PCanonicalArray(_) - }, - ), - ( - 1, - genArb.map { - PCanonicalSet(_) - }, - ), - ( - 1, - genArb.map { - PCanonicalInterval(_) - }, - ), - (1, preGenTuple(required, genArb)), - (1, Gen.zip(genRequired, genArb).map { case (k, v) => PCanonicalDict(k, v) }), - (1, genPStruct.resize(size)), - ) - } - - def preGenArb(required: Boolean, genStruct: Gen[PStruct] = genStruct): Gen[PType] = - Gen.sized(genSized(_, required, genStruct)) - - def genArb: Gen[PType] = Gen.coin(0.2).flatMap(preGenArb(_)) - - val genOptional: Gen[PType] = preGenArb(required = false) - - val genRequired: Gen[PType] = preGenArb(required = true) - - val genInsertable: Gen[PStruct] = genInsertableStruct - - implicit def arbType = Arbitrary(genArb) def canonical(t: Type, required: Boolean, innerRequired: Boolean): PType = { t match { @@ -397,15 +293,6 @@ object PType { } abstract class PType extends Serializable with Requiredness { - self => - - def genValue(sm: HailStateManager): Gen[Annotation] = - if (required) genNonmissingValue(sm) - else Gen.nextCoin(0.05).flatMap(isEmpty => - if (isEmpty) Gen.const(null) else genNonmissingValue(sm) - ) - - def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = virtualType.genNonmissingValue(sm) def virtualType: Type diff --git a/hail/hail/src/is/hail/types/virtual/TArray.scala b/hail/hail/src/is/hail/types/virtual/TArray.scala index d97d523b43b..95e2d6a0dcf 100644 --- a/hail/hail/src/is/hail/types/virtual/TArray.scala +++ b/hail/hail/src/is/hail/types/virtual/TArray.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, ExtendedOrdering} import is.hail.backend.HailStateManager -import is.hail.check.Gen import scala.reflect.{classTag, ClassTag} @@ -45,9 +44,6 @@ final case class TArray(elementType: Type) extends TContainer { override def str(a: Annotation): String = JsonMethods.compact(export(a)) - override def genNonmissingValue(sm: HailStateManager): Gen[IndexedSeq[Annotation]] = - Gen.buildableOf[Array](elementType.genValue(sm)).map(x => x: IndexedSeq[Annotation]) - def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = ExtendedOrdering.iterableOrdering(elementType.ordering(sm), missingEqual) diff --git a/hail/hail/src/is/hail/types/virtual/TBaseStruct.scala b/hail/hail/src/is/hail/types/virtual/TBaseStruct.scala index 95a0e69f6b7..1811254d2fb 100644 --- a/hail/hail/src/is/hail/types/virtual/TBaseStruct.scala +++ b/hail/hail/src/is/hail/types/virtual/TBaseStruct.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.utils._ import scala.reflect.{classTag, ClassTag} @@ -97,18 +96,6 @@ abstract class TBaseStruct extends Type { override def str(a: Annotation): String = JsonMethods.compact(export(a)) - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = { - if (types.isEmpty) { - Gen.const(Annotation.empty) - } else - Gen.size.flatMap(fuel => - if (types.length > fuel) - Gen.uniformSequence(types.map(t => Gen.const(null))).map(a => Annotation(a: _*)) - else - Gen.uniformSequence(types.map(t => t.genValue(sm))).map(a => Annotation(a: _*)) - ) - } - override def valuesSimilar(a1: Annotation, a2: Annotation, tolerance: Double, absolute: Boolean) : Boolean = a1 == a2 || (a1 != null && a2 != null diff --git a/hail/hail/src/is/hail/types/virtual/TBinary.scala b/hail/hail/src/is/hail/types/virtual/TBinary.scala index 804da2e06c0..c0c71e4aed4 100644 --- a/hail/hail/src/is/hail/types/virtual/TBinary.scala +++ b/hail/hail/src/is/hail/types/virtual/TBinary.scala @@ -2,19 +2,14 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TBinary extends Type { def _toPretty = "Binary" def _typeCheck(a: Any): Boolean = a.isInstanceOf[Array[Byte]] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Gen.buildableOf(arbitrary[Byte]) - override def scalaClassTag: ClassTag[Array[Byte]] = classTag[Array[Byte]] def mkOrdering(sm: HailStateManager, _missingEqual: Boolean = true): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TBoolean.scala b/hail/hail/src/is/hail/types/virtual/TBoolean.scala index 5fc1e898c62..366083c388f 100644 --- a/hail/hail/src/is/hail/types/virtual/TBoolean.scala +++ b/hail/hail/src/is/hail/types/virtual/TBoolean.scala @@ -2,10 +2,8 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TBoolean extends Type { def _toPretty = "Boolean" @@ -19,8 +17,6 @@ case object TBoolean extends Type { def parse(s: String): Annotation = s.toBoolean - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[Boolean] - override def scalaClassTag: ClassTag[java.lang.Boolean] = classTag[java.lang.Boolean] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TCall.scala b/hail/hail/src/is/hail/types/virtual/TCall.scala index 1db13fa683b..2cf2232b240 100644 --- a/hail/hail/src/is/hail/types/virtual/TCall.scala +++ b/hail/hail/src/is/hail/types/virtual/TCall.scala @@ -2,10 +2,9 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.variant.Call -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TCall extends Type { def _toPretty = "Call" @@ -17,8 +16,6 @@ case object TCall extends Type { def _typeCheck(a: Any): Boolean = a.isInstanceOf[Int] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = Call.genNonmissingValue - override def scalaClassTag: ClassTag[java.lang.Integer] = classTag[java.lang.Integer] override def str(a: Annotation): String = diff --git a/hail/hail/src/is/hail/types/virtual/TDict.scala b/hail/hail/src/is/hail/types/virtual/TDict.scala index 5de2f80d936..950608d0b37 100644 --- a/hail/hail/src/is/hail/types/virtual/TDict.scala +++ b/hail/hail/src/is/hail/types/virtual/TDict.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, ExtendedOrdering} import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.utils._ import scala.reflect.{classTag, ClassTag} @@ -61,9 +60,6 @@ final case class TDict(keyType: Type, valueType: Type) extends TContainer { override def str(a: Annotation): String = JsonMethods.compact(export(a)) - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Gen.buildableOf2[Map](Gen.zip(keyType.genValue(sm), valueType.genValue(sm))) - override def valuesSimilar(a1: Annotation, a2: Annotation, tolerance: Double, absolute: Boolean) : Boolean = a1 == a2 || (a1 != null && a2 != null && diff --git a/hail/hail/src/is/hail/types/virtual/TFloat32.scala b/hail/hail/src/is/hail/types/virtual/TFloat32.scala index 250cbfea535..4329a67d627 100644 --- a/hail/hail/src/is/hail/types/virtual/TFloat32.scala +++ b/hail/hail/src/is/hail/types/virtual/TFloat32.scala @@ -2,11 +2,9 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen import is.hail.utils._ -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TFloat32 extends TNumeric { def _toPretty = "Float32" @@ -21,9 +19,6 @@ case object TFloat32 extends TNumeric { override def str(a: Annotation): String = if (a == null) "NA" else "%.5e".format(a.asInstanceOf[Float]) - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - arbitrary[Double].map(_.toFloat) - override def valuesSimilar(a1: Annotation, a2: Annotation, tolerance: Double, absolute: Boolean) : Boolean = a1 == a2 || (a1 != null && a2 != null && { diff --git a/hail/hail/src/is/hail/types/virtual/TFloat64.scala b/hail/hail/src/is/hail/types/virtual/TFloat64.scala index 3af52119ce6..9c7849c7400 100644 --- a/hail/hail/src/is/hail/types/virtual/TFloat64.scala +++ b/hail/hail/src/is/hail/types/virtual/TFloat64.scala @@ -2,11 +2,9 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen import is.hail.utils._ -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TFloat64 extends TNumeric { override def _toPretty = "Float64" @@ -21,8 +19,6 @@ case object TFloat64 extends TNumeric { override def str(a: Annotation): String = if (a == null) "NA" else "%.5e".format(a.asInstanceOf[Double]) - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[Double] - override def valuesSimilar(a1: Annotation, a2: Annotation, tolerance: Double, absolute: Boolean) : Boolean = a1 == a2 || (a1 != null && a2 != null && { diff --git a/hail/hail/src/is/hail/types/virtual/TInt32.scala b/hail/hail/src/is/hail/types/virtual/TInt32.scala index 0836c64ad1c..4cc6e6bfb2c 100644 --- a/hail/hail/src/is/hail/types/virtual/TInt32.scala +++ b/hail/hail/src/is/hail/types/virtual/TInt32.scala @@ -2,10 +2,8 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TInt32 extends TIntegral { def _toPretty = "Int32" @@ -15,8 +13,6 @@ case object TInt32 extends TIntegral { def _typeCheck(a: Any): Boolean = a.isInstanceOf[Int] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[Int] - override def scalaClassTag: ClassTag[java.lang.Integer] = classTag[java.lang.Integer] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TInt64.scala b/hail/hail/src/is/hail/types/virtual/TInt64.scala index 8ae68c39f7e..c52c9a5b89b 100644 --- a/hail/hail/src/is/hail/types/virtual/TInt64.scala +++ b/hail/hail/src/is/hail/types/virtual/TInt64.scala @@ -2,10 +2,8 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TInt64 extends TIntegral { def _toPretty = "Int64" @@ -15,8 +13,6 @@ case object TInt64 extends TIntegral { def _typeCheck(a: Any): Boolean = a.isInstanceOf[Long] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[Long] - override def scalaClassTag: ClassTag[java.lang.Long] = classTag[java.lang.Long] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TInterval.scala b/hail/hail/src/is/hail/types/virtual/TInterval.scala index bc874bf72a4..9a213cc59cc 100644 --- a/hail/hail/src/is/hail/types/virtual/TInterval.scala +++ b/hail/hail/src/is/hail/types/virtual/TInterval.scala @@ -1,8 +1,7 @@ package is.hail.types.virtual -import is.hail.annotations.{Annotation, ExtendedOrdering} +import is.hail.annotations.ExtendedOrdering import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.utils.{FastSeq, Interval} import scala.reflect.{classTag, ClassTag} @@ -30,9 +29,6 @@ case class TInterval(pointType: Type) extends Type { pointType.typeCheck(i.start) && pointType.typeCheck(i.end) } - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Interval.gen(pointType.ordering(sm), pointType.genValue(sm)) - override def scalaClassTag: ClassTag[Interval] = classTag[Interval] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TLocus.scala b/hail/hail/src/is/hail/types/virtual/TLocus.scala index 2c52dd03de5..396e8e17a15 100644 --- a/hail/hail/src/is/hail/types/virtual/TLocus.scala +++ b/hail/hail/src/is/hail/types/virtual/TLocus.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check._ import is.hail.utils._ import is.hail.variant._ @@ -36,9 +35,6 @@ case class TLocus(rgName: String) extends Type { def _typeCheck(a: Any): Boolean = a.isInstanceOf[Locus] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Locus.gen(sm.referenceGenomes(rgName)) - override def scalaClassTag: ClassTag[Locus] = classTag[Locus] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean = true): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TNDArray.scala b/hail/hail/src/is/hail/types/virtual/TNDArray.scala index 0e53b888904..fa5639b4429 100644 --- a/hail/hail/src/is/hail/types/virtual/TNDArray.scala +++ b/hail/hail/src/is/hail/types/virtual/TNDArray.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, ExtendedOrdering, NDArray} import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.expr.{Nat, NatBase} import scala.reflect.{classTag, ClassTag} @@ -117,8 +116,6 @@ final case class TNDArray(elementType: Type, nDimsBase: NatBase) extends Type { case _ => false } - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? - override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = null lazy val shapeType: TTuple = TTuple(Array.fill(nDims)(TInt64): _*) diff --git a/hail/hail/src/is/hail/types/virtual/TRNGState.scala b/hail/hail/src/is/hail/types/virtual/TRNGState.scala index 87f3b9ce160..6215add08db 100644 --- a/hail/hail/src/is/hail/types/virtual/TRNGState.scala +++ b/hail/hail/src/is/hail/types/virtual/TRNGState.scala @@ -1,8 +1,6 @@ package is.hail.types.virtual -import is.hail.annotations.Annotation import is.hail.backend.HailStateManager -import is.hail.check.Gen case object TRNGState extends Type { override def _toPretty = "RNGState" @@ -10,7 +8,6 @@ case object TRNGState extends Type { override def pyString(sb: StringBuilder): Unit = sb.append("rng_state") - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? def _typeCheck(a: Any): Boolean = ??? def mkOrdering(sm: HailStateManager, missingEqual: Boolean) diff --git a/hail/hail/src/is/hail/types/virtual/TSet.scala b/hail/hail/src/is/hail/types/virtual/TSet.scala index f36bf1435f5..a122ab3c1d8 100644 --- a/hail/hail/src/is/hail/types/virtual/TSet.scala +++ b/hail/hail/src/is/hail/types/virtual/TSet.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, ExtendedOrdering} import is.hail.backend.HailStateManager -import is.hail.check.Gen import scala.reflect.{classTag, ClassTag} @@ -48,9 +47,6 @@ final case class TSet(elementType: Type) extends TContainer { override def str(a: Annotation): String = JsonMethods.compact(export(a)) - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - Gen.buildableOf[Set](elementType.genValue(sm)) - override def scalaClassTag: ClassTag[Set[AnyRef]] = classTag[Set[AnyRef]] override def valueSubsetter(subtype: Type): Any => Any = { diff --git a/hail/hail/src/is/hail/types/virtual/TStream.scala b/hail/hail/src/is/hail/types/virtual/TStream.scala index 730e329510c..204ebd61552 100644 --- a/hail/hail/src/is/hail/types/virtual/TStream.scala +++ b/hail/hail/src/is/hail/types/virtual/TStream.scala @@ -2,7 +2,6 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, ExtendedOrdering} import is.hail.backend.HailStateManager -import is.hail.check.Gen import scala.reflect.{classTag, ClassTag} @@ -40,9 +39,6 @@ final case class TStream(elementType: Type) extends TIterable { override def isRealizable = false - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = - throw new UnsupportedOperationException("Streams don't have associated annotations.") - override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = throw new UnsupportedOperationException("Stream comparison is currently undefined.") diff --git a/hail/hail/src/is/hail/types/virtual/TString.scala b/hail/hail/src/is/hail/types/virtual/TString.scala index c335cba9d4b..58ed4ecd626 100644 --- a/hail/hail/src/is/hail/types/virtual/TString.scala +++ b/hail/hail/src/is/hail/types/virtual/TString.scala @@ -2,10 +2,8 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.Arbitrary._ -import is.hail.check.Gen -import scala.reflect.{ClassTag, _} +import scala.reflect._ case object TString extends Type { def _toPretty = "String" @@ -17,8 +15,6 @@ case object TString extends Type { def _typeCheck(a: Any): Boolean = a.isInstanceOf[String] - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[String] - override def scalaClassTag: ClassTag[String] = classTag[String] override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = diff --git a/hail/hail/src/is/hail/types/virtual/TUnion.scala b/hail/hail/src/is/hail/types/virtual/TUnion.scala index 1f1663efdcb..637e2e7c8ea 100644 --- a/hail/hail/src/is/hail/types/virtual/TUnion.scala +++ b/hail/hail/src/is/hail/types/virtual/TUnion.scala @@ -1,8 +1,7 @@ package is.hail.types.virtual -import is.hail.annotations.{Annotation, ExtendedOrdering} +import is.hail.annotations.ExtendedOrdering import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.expr.ir.IRParser import is.hail.utils._ @@ -125,8 +124,6 @@ final case class TUnion(cases: IndexedSeq[Case]) extends Type { } } - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? - override def scalaClassTag: ClassTag[AnyRef] = ??? override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = ??? diff --git a/hail/hail/src/is/hail/types/virtual/TVariable.scala b/hail/hail/src/is/hail/types/virtual/TVariable.scala index 25a6a628f83..bffb3e81735 100644 --- a/hail/hail/src/is/hail/types/virtual/TVariable.scala +++ b/hail/hail/src/is/hail/types/virtual/TVariable.scala @@ -1,8 +1,7 @@ package is.hail.types.virtual -import is.hail.annotations.{Annotation, ExtendedOrdering} +import is.hail.annotations.ExtendedOrdering import is.hail.backend.HailStateManager -import is.hail.check.Gen import is.hail.types.Box import scala.collection.mutable @@ -71,8 +70,6 @@ final case class TVariable(name: String, cond: String = null) extends Type { t } - override def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? - override def scalaClassTag: ClassTag[AnyRef] = throw new RuntimeException("TVariable is not realizable") diff --git a/hail/hail/src/is/hail/types/virtual/TVoid.scala b/hail/hail/src/is/hail/types/virtual/TVoid.scala index 2f1c16e23ec..1c3ed36a2bf 100644 --- a/hail/hail/src/is/hail/types/virtual/TVoid.scala +++ b/hail/hail/src/is/hail/types/virtual/TVoid.scala @@ -1,8 +1,7 @@ package is.hail.types.virtual -import is.hail.annotations.{Annotation, ExtendedOrdering} +import is.hail.annotations.ExtendedOrdering import is.hail.backend.HailStateManager -import is.hail.check.Gen case object TVoid extends Type { override def _toPretty = "Void" @@ -10,8 +9,6 @@ case object TVoid extends Type { override def pyString(sb: StringBuilder): Unit = sb.append("void") - def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? - override def mkOrdering(sm: HailStateManager, missingEqual: Boolean): ExtendedOrdering = null override def scalaClassTag: scala.reflect.ClassTag[_ <: AnyRef] = diff --git a/hail/hail/src/is/hail/types/virtual/Type.scala b/hail/hail/src/is/hail/types/virtual/Type.scala index 85e1fc31f2e..0f5adf5e941 100644 --- a/hail/hail/src/is/hail/types/virtual/Type.scala +++ b/hail/hail/src/is/hail/types/virtual/Type.scala @@ -2,12 +2,10 @@ package is.hail.types.virtual import is.hail.annotations._ import is.hail.backend.HailStateManager -import is.hail.check.{Arbitrary, Gen} import is.hail.expr.{JSONAnnotationImpex, SparkAnnotationImpex} import is.hail.expr.ir._ import is.hail.utils import is.hail.utils._ -import is.hail.variant.ReferenceGenome import scala.reflect.ClassTag @@ -22,96 +20,6 @@ class TypeSerializer extends CustomSerializer[Type](_ => ) ) -object Type { - def genScalar(): Gen[Type] = - Gen.oneOf(TBoolean, TInt32, TInt64, TFloat32, - TFloat64, TString, TCall) - - def genComplexType(): Gen[Type] = { - val rgDependents = ReferenceGenome.hailReferences.toArray.map(TLocus(_)) - val others = Array(TCall) - Gen.oneOfSeq(rgDependents ++ others) - } - - def genFields(genFieldType: Gen[Type]): Gen[Array[Field]] = { - Gen.buildableOf[Array]( - Gen.zip(Gen.identifier, genFieldType) - ) - .filter(fields => fields.map(_._1).areDistinct()) - .map(fields => - fields - .iterator - .zipWithIndex - .map { case ((k, t), i) => Field(k, t, i) } - .toArray - ) - } - - def preGenStruct(genFieldType: Gen[Type]): Gen[TStruct] = - for (fields <- genFields(genFieldType)) yield TStruct(fields) - - def preGenTuple(genFieldType: Gen[Type]): Gen[TTuple] = - for (fields <- genFields(genFieldType)) yield TTuple(fields.map(_.typ): _*) - - private val defaultRequiredGenRatio = 0.2 - def genStruct: Gen[TStruct] = Gen.coin(defaultRequiredGenRatio).flatMap(c => preGenStruct(genArb)) - - def genSized(size: Int, genTStruct: Gen[TStruct]): Gen[Type] = - if (size < 1) - Gen.const(TStruct.empty) - else if (size < 2) - genScalar() - else { - Gen.frequency( - (4, genScalar()), - (1, genComplexType()), - ( - 1, - genArb.map { - TArray(_) - }, - ), - ( - 1, - genArb.map { - TSet(_) - }, - ), - ( - 1, - genArb.map { - TInterval(_) - }, - ), - (1, preGenTuple(genArb)), - (1, Gen.zip(genRequired, genArb).map { case (k, v) => TDict(k, v) }), - (1, genTStruct.resize(size)), - ) - } - - def preGenArb(genStruct: Gen[TStruct] = genStruct): Gen[Type] = - Gen.sized(genSized(_, genStruct)) - - def genArb: Gen[Type] = preGenArb() - - val genOptional: Gen[Type] = preGenArb() - - val genRequired: Gen[Type] = preGenArb() - - def genWithValue(sm: HailStateManager): Gen[(Type, Annotation)] = for { - s <- Gen.size - // prefer smaller type and bigger values - fraction <- Gen.choose(0.1, 0.3) - x = (fraction * s).toInt - y = s - x - t <- Type.genStruct.resize(x) - v <- t.genValue(sm).resize(y) - } yield (t, v) - - implicit def arbType: Arbitrary[Type] = - Arbitrary(genArb) -} - abstract class Type extends VType with Serializable { def children: IndexedSeq[Type] = FastSeq() @@ -179,11 +87,6 @@ abstract class Type extends VType with Serializable { override def toJSON: JValue = JString(toString) - def genNonmissingValue(sm: HailStateManager): Gen[Annotation] - - def genValue(sm: HailStateManager): Gen[Annotation] = - Gen.nextCoin(0.05).flatMap(isEmpty => if (isEmpty) Gen.const(null) else genNonmissingValue(sm)) - def isRealizable: Boolean = children.forall(_.isRealizable) /* compare values for equality, but compare Float and Double values by the absolute value of their diff --git a/hail/hail/src/is/hail/utils/Interval.scala b/hail/hail/src/is/hail/utils/Interval.scala index 6ffa27745d4..3beaa10b34d 100644 --- a/hail/hail/src/is/hail/utils/Interval.scala +++ b/hail/hail/src/is/hail/utils/Interval.scala @@ -1,7 +1,6 @@ package is.hail.utils import is.hail.annotations._ -import is.hail.check._ import is.hail.types.virtual.TBoolean import org.apache.spark.sql.Row @@ -208,16 +207,6 @@ object Interval { IntervalEndpoint(end, if (includesEnd) 1 else -1), ) - def gen[P](pord: ExtendedOrdering, pgen: Gen[P]): Gen[Interval] = - Gen.zip(pgen, pgen, Gen.coin(), Gen.coin()) - .filter { case (x, y, s, e) => pord.compare(x, y) != 0 || (s && e) } - .map { case (x, y, s, e) => - if (pord.compare(x, y) < 0) - Interval(x, y, s, e) - else - Interval(y, x, s, e) - } - def ordering(pord: ExtendedOrdering, startPrimary: Boolean, _missingEqual: Boolean = true) : ExtendedOrdering = new ExtendedOrdering { val missingEqual = _missingEqual diff --git a/hail/hail/src/is/hail/utils/package.scala b/hail/hail/src/is/hail/utils/package.scala index 201cf59aa75..8b6affd9801 100644 --- a/hail/hail/src/is/hail/utils/package.scala +++ b/hail/hail/src/is/hail/utils/package.scala @@ -1,7 +1,6 @@ package is.hail import is.hail.annotations.ExtendedOrdering -import is.hail.check.Gen import is.hail.expr.ir.ByteArrayBuilder import is.hail.io.fs.{FS, FileListEntry} @@ -324,8 +323,6 @@ package object utils def flushDouble(a: Double): Double = if (math.abs(a) < java.lang.Double.MIN_NORMAL) 0.0 else a - def genBase: Gen[Char] = Gen.oneOf('A', 'C', 'T', 'G') - def getPartNumber(fname: String): Int = { val partRegex = """.*/?part-(\d+).*""".r @@ -346,11 +343,6 @@ package object utils } } - // ignore size; atomic, like String - def genDNAString: Gen[String] = Gen.stringOf(genBase) - .resize(12) - .filter(s => !s.isEmpty) - def prettyIdentifier(str: String): String = if (str.matches("""[_a-zA-Z]\w*""")) str diff --git a/hail/hail/src/is/hail/variant/Call.scala b/hail/hail/src/is/hail/variant/Call.scala index 400d9e63559..e542a325bab 100644 --- a/hail/hail/src/is/hail/variant/Call.scala +++ b/hail/hail/src/is/hail/variant/Call.scala @@ -1,6 +1,5 @@ package is.hail.variant -import is.hail.check.Gen import is.hail.expr.Parser import is.hail.utils._ @@ -468,53 +467,4 @@ object Call extends Serializable { } } } - - def check(c: Call, nAlleles: Int): Unit = { - (ploidy(c): @switch) match { - case 0 => - case 1 => - val a = alleleByIndex(c, 0) - assert(a >= 0 && a < nAlleles) - case 2 => - val nGenotypes = triangle(nAlleles) - val udtn = - if (isPhased(c)) { - val p = allelePair(c) - unphasedDiploidGtIndex(Call2(AllelePair.j(p), AllelePair.k(p))) - } else - unphasedDiploidGtIndex(c) - assert( - udtn < nGenotypes, - s"Invalid call found '${c.toString}' for number of alleles equal to '$nAlleles'.", - ) - case _ => - alleles(c).foreach(a => assert(a >= 0 && a < nAlleles)) - } - } - - def gen( - nAlleles: Int, - ploidyGen: Gen[Int] = Gen.choose(0, 2), - phasedGen: Gen[Boolean] = Gen.nextCoin(0.5), - ): Gen[Call] = for { - ploidy <- ploidyGen - phased <- phasedGen - alleles <- Gen.buildableOfN[Array](ploidy, Gen.choose(0, nAlleles - 1)) - } yield { - val c = CallN(alleles, phased) - check(c, nAlleles) - c - } - - def genUnphasedDiploid(nAlleles: Int): Gen[Call] = gen(nAlleles, Gen.const(2), Gen.const(false)) - - def genPhasedDiploid(nAlleles: Int): Gen[Call] = gen(nAlleles, Gen.const(2), Gen.const(true)) - - def genNonmissingValue: Gen[Call] = for { - nAlleles <- Gen.choose(2, 5) - c <- gen(nAlleles) - } yield { - check(c, nAlleles) - c - } } diff --git a/hail/hail/src/is/hail/variant/Genotype.scala b/hail/hail/src/is/hail/variant/Genotype.scala index 3acf5989c6c..d59ae0241ad 100644 --- a/hail/hail/src/is/hail/variant/Genotype.scala +++ b/hail/hail/src/is/hail/variant/Genotype.scala @@ -1,9 +1,7 @@ package is.hail.variant import is.hail.annotations.Annotation -import is.hail.check.Gen import is.hail.types.virtual.{TArray, TCall, TInt32, TStruct} -import is.hail.utils._ import org.apache.spark.sql.Row @@ -240,96 +238,4 @@ object Genotype { else diploidGtIndex(i, j) - def genExtremeNonmissing(nAlleles: Int): Gen[Annotation] = { - val m = Int.MaxValue / (nAlleles + 1) - val nGenotypes = triangle(nAlleles) - val gg = for { - c: Option[Call] <- Gen.option(Call.genUnphasedDiploid(nAlleles)) - ad <- Gen.option(Gen.buildableOfN[Array](nAlleles, Gen.choose(0, m))) - dp <- Gen.option(Gen.choose(0, m)) - gq <- Gen.option(Gen.choose(0, 10000)) - pl <- Gen.oneOfGen( - Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, m))), - Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, 100))), - ) - } yield { - c.foreach(c => pl.foreach(pla => pla(Call.unphasedDiploidGtIndex(c)) = 0)) - pl.foreach { pla => - val m = pla.min - var i = 0 - while (i < pla.length) { - pla(i) -= m - i += 1 - } - } - val g = Annotation( - c.orNull, - ad.map(a => a: IndexedSeq[Int]).orNull, - dp.map(_ + ad.map(_.sum).getOrElse(0)).orNull, - gq.orNull, - pl.map(a => a: IndexedSeq[Int]).orNull, - ) - g - } - gg - } - - def genExtreme(nAlleles: Int): Gen[Annotation] = - Gen.frequency( - (100, genExtremeNonmissing(nAlleles)), - (1, Gen.const(null)), - ) - - def genRealisticNonmissing(nAlleles: Int): Gen[Annotation] = { - val nGenotypes = triangle(nAlleles) - val gg = for { - callRate <- Gen.choose(0d, 1d) - alleleFrequencies <- - Gen.buildableOfN[Array](nAlleles, Gen.choose(1e-6, 1d)) // avoid divison by 0 - .map { rawWeights => - val sum = rawWeights.sum - rawWeights.map(_ / sum) - } - c <- Gen.option( - Gen.zip(Gen.chooseWithWeights(alleleFrequencies), Gen.chooseWithWeights(alleleFrequencies)) - .map { case (gti, gtj) => Call2(gti, gtj) }, - callRate, - ) - ad <- Gen.option(Gen.buildableOfN[Array](nAlleles, Gen.choose(0, 50))) - dp <- Gen.choose(0, 30).map(d => ad.map(o => o.sum + d)) - pl <- Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, 1000)).map { arr => - c match { - case Some(x) => - arr(Call.unphasedDiploidGtIndex(x)) = 0 - arr - case None => - val min = arr.min - arr.map(_ - min) - } - }) - gq <- Gen.choose(-30, 30).map(i => pl.map(pls => math.max(0, gqFromPL(pls) + i))) - } yield Annotation(c.orNull, ad.map(a => a: IndexedSeq[Int]).orNull, dp.orNull, gq.orNull, pl.map(a => a: IndexedSeq[Int]).orNull) - gg - } - - def genRealistic(nAlleles: Int): Gen[Annotation] = - Gen.frequency( - (100, genRealisticNonmissing(nAlleles)), - (1, Gen.const(null)), - ) - - def genGenericCallAndProbabilitiesGenotype(nAlleles: Int): Gen[Annotation] = { - val nGenotypes = triangle(nAlleles) - val gg = for (gp <- Gen.option(Gen.partition(nGenotypes, 32768))) yield { - val c = gp.flatMap(a => Option(uniqueMaxIndex(a))).map(Call2.fromUnphasedDiploidGtIndex(_)) - Row( - c.orNull, - gp.map(gpx => gpx.map(p => p.toDouble / 32768): IndexedSeq[Double]).orNull, - ) - } - Gen.frequency( - (100, gg), - (1, Gen.const(null)), - ) - } } diff --git a/hail/hail/src/is/hail/variant/Locus.scala b/hail/hail/src/is/hail/variant/Locus.scala index da7dc169e7b..0dce76a33c3 100644 --- a/hail/hail/src/is/hail/variant/Locus.scala +++ b/hail/hail/src/is/hail/variant/Locus.scala @@ -1,7 +1,6 @@ package is.hail.variant import is.hail.annotations.Annotation -import is.hail.check.Gen import is.hail.expr.Parser import is.hail.utils._ @@ -34,11 +33,6 @@ object Locus { def fromRow(r: Row): Locus = Locus(r.getAs[String](0), r.getInt(1)) - def gen(rg: ReferenceGenome): Gen[Locus] = for { - (contig, length) <- Contig.gen(rg) - pos <- Gen.choose(1, length) - } yield Locus(contig, pos) - def parse(str: String, rg: ReferenceGenome): Locus = { val elts = str.split(":") val size = elts.length diff --git a/hail/hail/src/is/hail/variant/ReferenceGenome.scala b/hail/hail/src/is/hail/variant/ReferenceGenome.scala index 1aa5a14d526..88e07e04073 100644 --- a/hail/hail/src/is/hail/variant/ReferenceGenome.scala +++ b/hail/hail/src/is/hail/variant/ReferenceGenome.scala @@ -2,7 +2,6 @@ package is.hail.variant import is.hail.annotations.ExtendedOrdering import is.hail.backend.ExecuteContext -import is.hail.check.Gen import is.hail.expr.{ JSONExtractContig, JSONExtractIntervalLocus, JSONExtractReferenceGenome, Parser, } @@ -607,33 +606,6 @@ object ReferenceGenome { Integer.compare(l1.position, l2.position) } - def gen: Gen[ReferenceGenome] = - for { - name <- Gen.identifier.filter(!ReferenceGenome.hailReferences.contains(_)) - nContigs <- Gen.choose(3, 10) - contigs <- Gen.distinctBuildableOfN[Array](nContigs, Gen.identifier) - lengths <- Gen.buildableOfN[Array](nContigs, Gen.choose(1000000, 500000000)) - contigsIndex = contigs.zip(lengths).toMap - xContig <- Gen.oneOfSeq(contigs) - parXA <- Gen.choose(0, contigsIndex(xContig)) - parXB <- Gen.choose(0, contigsIndex(xContig)) - yContig <- Gen.oneOfSeq(contigs) if yContig != xContig - parYA <- Gen.choose(0, contigsIndex(yContig)) - parYB <- Gen.choose(0, contigsIndex(yContig)) - mtContig <- Gen.oneOfSeq(contigs) if mtContig != xContig && mtContig != yContig - } yield ReferenceGenome( - name, - contigs, - contigs.zip(lengths).toMap, - Set(xContig), - Set(yContig), - Set(mtContig), - Array( - (Locus(xContig, math.min(parXA, parXB)), Locus(xContig, math.max(parXA, parXB))), - (Locus(yContig, math.min(parYA, parYB)), Locus(yContig, math.max(parYA, parYB))), - ), - ) - def apply( name: String, contigs: Array[String], diff --git a/hail/hail/src/is/hail/variant/VariantMethods.scala b/hail/hail/src/is/hail/variant/VariantMethods.scala index 96f5cc016bb..63cd4d2c792 100644 --- a/hail/hail/src/is/hail/variant/VariantMethods.scala +++ b/hail/hail/src/is/hail/variant/VariantMethods.scala @@ -1,13 +1,7 @@ package is.hail.variant -import is.hail.annotations.Annotation -import is.hail.check.Gen import is.hail.utils._ -object Contig { - def gen(rg: ReferenceGenome): Gen[(String, Int)] = Gen.oneOfSeq(rg.lengths.toSeq) -} - object VariantMethods { def parse(str: String, rg: ReferenceGenome): (Locus, IndexedSeq[String]) = { @@ -70,48 +64,3 @@ object VariantMethods { } } } - -object VariantSubgen { - def random(rg: ReferenceGenome): VariantSubgen = VariantSubgen( - contigGen = Contig.gen(rg), - nAllelesGen = Gen.frequency((5, Gen.const(2)), (1, Gen.choose(2, 10))), - refGen = genDNAString, - altGen = Gen.frequency((10, genDNAString), (1, Gen.const("*"))), - ) - - def plinkCompatible(rg: ReferenceGenome): VariantSubgen = { - val r = random(rg) - val compatible = (1 until 22).map(_.toString).toSet - r.copy( - contigGen = r.contigGen.filter { case (contig, _) => - compatible.contains(contig) - } - ) - } - - def biallelic(rg: ReferenceGenome): VariantSubgen = random(rg).copy(nAllelesGen = Gen.const(2)) - - def plinkCompatibleBiallelic(rg: ReferenceGenome): VariantSubgen = - plinkCompatible(rg).copy(nAllelesGen = Gen.const(2)) -} - -case class VariantSubgen( - contigGen: Gen[(String, Int)], - nAllelesGen: Gen[Int], - refGen: Gen[String], - altGen: Gen[String], -) { - - def genLocusAlleles: Gen[Annotation] = - for { - (contig, length) <- contigGen - start <- Gen.choose(1, length) - nAlleles <- nAllelesGen - ref <- refGen - altAlleles <- Gen.distinctBuildableOfN[Array]( - nAlleles - 1, - altGen, - ) - .filter(!_.contains(ref)) - } yield Annotation(Locus(contig, start), (ref +: altAlleles).toFastSeq) -} diff --git a/hail/hail/test/src/is/hail/annotations/StagedConstructorSuite.scala b/hail/hail/test/src/is/hail/annotations/StagedConstructorSuite.scala index 8acfb0089c4..3c65bc1042e 100644 --- a/hail/hail/test/src/is/hail/annotations/StagedConstructorSuite.scala +++ b/hail/hail/test/src/is/hail/annotations/StagedConstructorSuite.scala @@ -2,6 +2,7 @@ package is.hail.annotations import is.hail.HailSuite import is.hail.asm4s._ +import is.hail.check.Arbitrary.arbitrary import is.hail.check.{Gen, Prop} import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder, IEmitCode, RequirednessSuite} import is.hail.types.physical._ @@ -10,7 +11,6 @@ import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.SInt32Value import is.hail.types.virtual._ import is.hail.utils._ - import org.apache.spark.sql.Row import org.testng.annotations.Test @@ -482,10 +482,11 @@ class StagedConstructorSuite extends HailSuite { } @Test def testDeepCopy(): Unit = { - val g = Type.genStruct - .flatMap(t => Gen.zip(Gen.const(t), t.genValue(sm))) - .filter { case (_, a) => a != null } - .map { case (t, a) => (PType.canonical(t).asInstanceOf[PStruct], a) } + val g = for { + tstruct <- arbitrary[TStruct] + struct <- genVal(tstruct) + if struct != null + } yield (PType.canonical(tstruct), struct) val p = Prop.forAll(g) { case (t, a) => assert(t.virtualType.typeCheck(a)) diff --git a/hail/hail/test/src/is/hail/annotations/UnsafeSuite.scala b/hail/hail/test/src/is/hail/annotations/UnsafeSuite.scala index 81a2a82e3a7..dd5d5b4e1e6 100644 --- a/hail/hail/test/src/is/hail/annotations/UnsafeSuite.scala +++ b/hail/hail/test/src/is/hail/annotations/UnsafeSuite.scala @@ -2,7 +2,6 @@ package is.hail.annotations import is.hail.HailSuite import is.hail.backend.ExecuteContext -import is.hail.check._ import is.hail.io._ import is.hail.rvd.AbstractRVDSpec import is.hail.types.physical._ diff --git a/hail/hail/test/src/is/hail/check/Arbitrary.scala b/hail/hail/test/src/is/hail/check/Arbitrary.scala new file mode 100644 index 00000000000..8d7b8ce68db --- /dev/null +++ b/hail/hail/test/src/is/hail/check/Arbitrary.scala @@ -0,0 +1,90 @@ +package is.hail.check + +import scala.collection.generic.CanBuildFrom +import scala.language.higherKinds + +object Arbitrary { + def apply[T](arbitrary: Gen[T]): Arbitrary[T] = + new Arbitrary(arbitrary) + + implicit def arbBoolean: Arbitrary[Boolean] = new Arbitrary( + Gen.oneOf(true, false) + ) + + implicit def arbByte: Arbitrary[Byte] = new Arbitrary(Gen.oneOfGen( + Gen.oneOf(Byte.MinValue, -1, 0, 1, Byte.MaxValue), + Gen(p => p.rng.getRandomGenerator.nextInt().toByte), + )) + + implicit def arbInt: Arbitrary[Int] = new Arbitrary( + Gen.oneOfGen( + Gen.oneOf(Int.MinValue, -1, 0, 1, Int.MaxValue), + Gen.choose(-100, 100), + Gen(p => p.rng.getRandomGenerator.nextInt()), + ) + ) + + implicit def arbLong: Arbitrary[Long] = new Arbitrary( + Gen.oneOfGen( + Gen.oneOf(Long.MinValue, -1L, 0L, 1L, Long.MaxValue), + Gen.choose(-100, 100), + Gen(p => p.rng.getRandomGenerator.nextLong()), + ) + ) + + implicit def arbFloat: Arbitrary[Float] = new Arbitrary( + Gen.oneOfGen( + Gen.oneOf( + Float.MinValue, + -1.0f, + -Float.MinPositiveValue, + 0.0f, + Float.MinPositiveValue, + 1.0f, + Float.MaxValue, + ), + Gen.choose(-100.0f, 100.0f), + Gen(p => p.rng.nextUniform(Float.MinValue, Float.MaxValue, true).toFloat), + ) + ) + + implicit def arbDouble: Arbitrary[Double] = new Arbitrary( + Gen.oneOfGen( + Gen.oneOf( + Double.MinValue, + -1.0, + -Double.MinPositiveValue, + 0.0, + Double.MinPositiveValue, + 1.0, + Double.MaxValue, + ), + Gen.choose(-100.0, 100.0), + Gen(p => p.rng.nextUniform(Double.MinValue, Double.MaxValue, true)), + ) + ) + + implicit def arbString: Arbitrary[String] = new Arbitrary(Gen.frequency( + (1, Gen.const("")), + ( + 10, + Gen { (p: Parameters) => + val s = p.rng.getRandomGenerator.nextInt(12) + val b = new StringBuilder() + for (i <- 0 until s) + b += Gen.randomOneOf(p.rng, Gen.printableChars) + b.result() + }, + ), + )) + + implicit def arbBuildableOf[C[_], T]( + implicit a: Arbitrary[T], + cbf: CanBuildFrom[Nothing, T, C[T]], + ): Arbitrary[C[T]] = + Arbitrary(Gen.buildableOf(a.arbitrary)) + + def arbitrary[T](implicit arb: Arbitrary[T]): Gen[T] = arb.arbitrary +} + +class Arbitrary[T](val arbitrary: Gen[T]) diff --git a/hail/hail/test/src/is/hail/check/Gen.scala b/hail/hail/test/src/is/hail/check/Gen.scala new file mode 100644 index 00000000000..be6a9f8eb94 --- /dev/null +++ b/hail/hail/test/src/is/hail/check/Gen.scala @@ -0,0 +1,562 @@ +package is.hail.check + +import is.hail.check.Arbitrary.arbitrary +import is.hail.utils.roundWithConstantSum + +import scala.collection.generic.CanBuildFrom +import scala.collection.mutable +import scala.language.higherKinds +import scala.math.Numeric.Implicits._ +import scala.reflect.ClassTag + +import breeze.linalg.DenseMatrix +import breeze.storage.Zero +import org.apache.commons.math3.random._ + +object Parameters { + val default = Parameters(new RandomDataGenerator(), 1000, 10) +} + +case class Parameters(rng: RandomDataGenerator, size: Int, count: Int) { + + def frequency(pass: Int, outOf: Int): Boolean = { + assert(outOf > 0) + rng.getRandomGenerator.nextInt(outOf) < pass + } +} + +object Gen { + + val nonExtremeDouble: Gen[Double] = oneOfGen( + oneOf(1e30, -1.0, -1e-30, 0.0, 1e-30, 1.0, 1e30), + choose(-100.0, 100.0), + choose(-1e150, 1e150), + ) + + def squareOfAreaAtMostSize: Gen[(Int, Int)] = + nCubeOfVolumeAtMostSize(2).map(x => (x(0), x(1))) + + def nonEmptySquareOfAreaAtMostSize: Gen[(Int, Int)] = + nonEmptyNCubeOfVolumeAtMostSize(2).map(x => (x(0), x(1))) + + def nCubeOfVolumeAtMostSize(n: Int): Gen[Array[Int]] = + Gen((p: Parameters) => nCubeOfVolumeAtMost(p.rng, n, p.size)) + + def nonEmptyNCubeOfVolumeAtMostSize(n: Int): Gen[Array[Int]] = + Gen { (p: Parameters) => + nCubeOfVolumeAtMost(p.rng, n, p.size).map(x => if (x == 0) 1 else x).toArray + } + + def partition[T]( + rng: RandomDataGenerator, + size: T, + parts: Int, + f: (RandomDataGenerator, T) => T, + )(implicit + tn: Numeric[T], + tct: ClassTag[T], + ): Array[T] = { + import tn.mkOrderingOps + assert( + size >= tn.zero, + s"size must be greater than or equal to 0. Found $size. tn.zero=${tn.zero}.", + ) + + if (parts == 0) + return Array() + + val a = Array.fill[T](parts)(tn.zero) + var sizeAvail = size + val nSuccesses = rng.getRandomGenerator.nextInt(parts) + 1 + + for (i <- 0 until nSuccesses - 1) { + val s = if (sizeAvail != tn.zero) f(rng, sizeAvail) else tn.zero + a(i) = s + sizeAvail -= s + } + + a(nSuccesses - 1) = sizeAvail + + assert(a.sum == size) + + rng.nextPermutation(a.length, a.length).map(a) + } + + def partition(rng: RandomDataGenerator, size: Int, parts: Int): Array[Int] = + partition(rng, size, parts, (rng: RandomDataGenerator, avail: Int) => rng.nextInt(0, avail)) + + /** Picks a number of bins, n, from a BetaBinomial(alpha, beta), then takes {@code size} balls and + * places them into n bins according to a dirichlet-multinomial distribution with all alpha_i + * equal to n. + */ + def partitionBetaDirichlet(rng: RandomDataGenerator, size: Int, alpha: Double, beta: Double) + : Array[Int] = + partitionDirichlet(rng, size, sampleBetaBinomial(rng, size, alpha, beta)) + + /** Takes {@code size} balls and places them into {@code parts} bins according to a + * dirichlet-multinomial distribution with alpha_n equal to {@code parts} for all n. The outputs + * of this function tend towards uniformly distributed balls, i.e. vectors close to the center of + * the simplex in {@code parts} dimensions. + */ + def partitionDirichlet(rng: RandomDataGenerator, size: Int, parts: Int): Array[Int] = { + val simplexVector = sampleDirichlet(rng, Array.fill(parts)(parts.toDouble)) + roundWithConstantSum(simplexVector.map((x: Double) => x * size).toArray) + } + + def nCubeOfVolumeAtMost(rng: RandomDataGenerator, n: Int, size: Int, alpha: Int = 1) + : Array[Int] = { + val sizeOfSum = math.log(size) + val simplexVector = sampleDirichlet(rng, Array.fill(n)(alpha.toDouble)) + roundWithConstantSum(simplexVector.map((x: Double) => x * sizeOfSum).toArray) + .map(x => math.exp(x).toInt).toArray + } + + private def sampleDirichlet(rng: RandomDataGenerator, alpha: Array[Double]): Array[Double] = { + val draws = alpha.map(rng.nextGamma(_, 1)) + val sum = draws.sum + draws.map((x: Double) => x / sum).toArray + } + + def partition(parts: Int, sum: Int): Gen[Array[Int]] = + Gen { p => + partition(p.rng, sum, parts, (rng: RandomDataGenerator, avail: Int) => rng.nextInt(0, avail)) + } + + def partition(parts: Int, sum: Long): Gen[Array[Long]] = + Gen { p => + partition( + p.rng, + sum, + parts, + (rng: RandomDataGenerator, avail: Long) => rng.nextLong(0, avail), + ) + } + + def partition(parts: Int, sum: Double): Gen[Array[Double]] = + Gen { p => + partition( + p.rng, + sum, + parts, + (rng: RandomDataGenerator, avail: Double) => rng.nextUniform(0, avail), + ) + } + + def partitionSize(parts: Int): Gen[Array[Int]] = + Gen(p => partitionDirichlet(p.rng, p.size, parts)) + + def size: Gen[Int] = Gen(p => p.size) + + val printableChars = (0 to 127).map(_.toChar).filter(!_.isControl).toArray + + val identifierLeadingChars = (0 to 127).map(_.toChar) + .filter(c => c == '_' || c.isLetter) + + val identifierChars = (0 to 127).map(_.toChar) + .filter(c => c == '_' || c.isLetterOrDigit) + + val plinkSafeStartOfIdentifierChars = (0 to 127).map(_.toChar) + .filter(c => c.isLetter) + + val plinkSafeChars = (0 to 127).map(_.toChar) + .filter(c => c.isLetterOrDigit) + + def apply[T](gen: (Parameters) => T): Gen[T] = new Gen[T](gen) + + def const[T](x: T): Gen[T] = Gen((p: Parameters) => x) + + def coin(p: Double = 0.5): Gen[Boolean] = { + require(0.0 < p) + require(p < 1.0) + choose(0.0, 1.0).map(_ <= p) + } + + def oneOfSeq[T](xs: Seq[T]): Gen[T] = { + assert(xs.nonEmpty) + Gen((p: Parameters) => xs(p.rng.getRandomGenerator.nextInt(xs.length))) + } + + def oneOfGen[T](gs: Gen[T]*): Gen[T] = { + assert(gs.nonEmpty) + Gen((p: Parameters) => gs(p.rng.getRandomGenerator.nextInt(gs.length))(p)) + } + + def oneOf[T](xs: T*): Gen[T] = oneOfSeq(xs) + + def choose(min: Int, max: Int): Gen[Int] = { + assert(max >= min) + Gen((p: Parameters) => p.rng.nextInt(min, max)) + } + + def choose(min: Long, max: Long): Gen[Long] = { + assert(max >= min) + Gen((p: Parameters) => p.rng.nextLong(min, max)) + } + + def choose(min: Float, max: Float): Gen[Float] = Gen { (p: Parameters) => + p.rng.nextUniform(min, max, true).toFloat + } + + def choose(min: Double, max: Double): Gen[Double] = Gen { (p: Parameters) => + p.rng.nextUniform(min, max, true) + } + + def gaussian(mu: Double, sigma: Double): Gen[Double] = Gen { (p: Parameters) => + p.rng.nextGaussian(mu, sigma) + } + + def nextBeta(alpha: Double, beta: Double): Gen[Double] = Gen { (p: Parameters) => + p.rng.nextBeta(alpha, beta) + } + + def nextCoin(p: Double) = + choose(0.0, 1.0).map(_ < p) + + private def sampleBetaBinomial(rng: RandomDataGenerator, n: Int, alpha: Double, beta: Double) + : Int = + rng.nextBinomial(n, rng.nextBeta(alpha, beta)) + + def nextBetaBinomial(n: Int, alpha: Double, beta: Double): Gen[Int] = Gen { p => + sampleBetaBinomial(p.rng, n, alpha, beta) + } + + def shuffle[T](is: IndexedSeq[T]): Gen[IndexedSeq[T]] = { + Gen { (p: Parameters) => + if (is.isEmpty) + is + else + p.rng.nextPermutation(is.size, is.size).map(is) + } + } + + def chooseWithWeights(weights: Array[Double]): Gen[Int] = + frequency(weights.zipWithIndex.map { case (w, i) => (w, Gen.const(i)) }: _*) + + def frequency[T, U](wxs: (T, Gen[U])*)(implicit ev: scala.math.Numeric[T]): Gen[U] = { + import Numeric.Implicits._ + + assert(wxs.nonEmpty) + + val running = Array.fill[Double](wxs.length)(0d) + for (i <- 1 until wxs.length) { + val w = wxs(i - 1)._1.toDouble + assert(w >= 0d) + running(i) = running(i - 1) + w + } + + val outOf = running.last + wxs.last._1.toDouble + + Gen { (p: Parameters) => + val v = p.rng.getRandomGenerator.nextDouble * outOf.toDouble + val t = running.indexWhere(x => x >= v) - 1 + val j = if (t < 0) running.length - 1 else t + assert(j >= 0 && j < wxs.length) + assert(v >= running(j) + && (j == wxs.length - 1 || v < running(j + 1))) + wxs(j)._2(p) + } + } + + def subset[T](s: Set[T]): Gen[Set[T]] = Gen.parameterized { p => + Gen.choose(0.0, 1.0).map(cutoff => + s.filter(_ => p.rng.getRandomGenerator.nextDouble <= cutoff) + ) + } + + def sequence[C[_], T](gs: Traversable[Gen[T]])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]) + : Gen[C[T]] = + Gen { (p: Parameters) => + val b = cbf() + gs.foreach(g => b += g(p)) + b.result() + } + + def denseMatrix[T: ClassTag: Zero: Arbitrary](): Gen[DenseMatrix[T]] = for { + (l, w) <- Gen.nonEmptySquareOfAreaAtMostSize + m <- denseMatrix(l, w) + } yield m + + def denseMatrix[T: ClassTag: Zero: Arbitrary](n: Int, m: Int): Gen[DenseMatrix[T]] = + denseMatrix[T](n, m, arbitrary[T]) + + def denseMatrix[T: ClassTag: Zero](n: Int, m: Int, g: Gen[T]): Gen[DenseMatrix[T]] = + Gen((p: Parameters) => DenseMatrix.fill[T](n, m)(g.resize(p.size / (n * m))(p))) + + def twoMultipliableDenseMatrices[T: ClassTag: Zero: Arbitrary]() + : Gen[(DenseMatrix[T], DenseMatrix[T])] = + twoMultipliableDenseMatrices(arbitrary[T]) + + def twoMultipliableDenseMatrices[T: ClassTag: Zero](g: Gen[T]) + : Gen[(DenseMatrix[T], DenseMatrix[T])] = for { + Array(rows, inner, columns) <- Gen.nonEmptyNCubeOfVolumeAtMostSize(3) + l <- denseMatrix(rows, inner, g) + r <- denseMatrix(inner, columns, g) + } yield (l, r) + + /** In general, for any Traversable type T and any Monad M, we may convert an {@code F[M[T]]} to + * an {@code M[F[T]]} by choosing to perform the actions in the order defined by the traversable. + * With {@code Gen} we must also consider the distribution of size. {@code uniformSequence} + * distributes the size uniformly across all elements of the traversable. + */ + def uniformSequence[C[_], T]( + gs: Traversable[Gen[T]] + )(implicit cbf: CanBuildFrom[Nothing, T, C[T]] + ): Gen[C[T]] = + partitionSize(gs.size).map(resizeMany(gs, _)).flatMap(sequence[C, T]) + + private def resizeMany[T](gs: Traversable[Gen[T]], partition: Array[Int]): Iterable[Gen[T]] = + (gs.toIterable, partition).zipped.map((gen, size) => gen.resize(size)) + + def stringOf[T](g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, String]): Gen[String] = + unsafeBuildableOf(g) + + sealed trait BuildableOf[C[_]] { + def apply[T](g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]): Gen[C[T]] = + unsafeBuildableOf(g) + } + + private object buildableOfInstance extends BuildableOf[Nothing] + + def buildableOf[C[_]] = buildableOfInstance.asInstanceOf[BuildableOf[C]] + + implicit def buildableOfFromElements[C[_], T]( + implicit g: Gen[T], + cbf: CanBuildFrom[Nothing, T, C[T]], + ): Gen[C[T]] = + buildableOf[C](g) + + sealed trait BuildableOf2[C[_, _]] { + def apply[T, U](g: Gen[(T, U)])(implicit cbf: CanBuildFrom[Nothing, (T, U), C[T, U]]) + : Gen[C[T, U]] = + unsafeBuildableOf(g) + } + + private object buildableOf2Instance extends BuildableOf2[Nothing] + + def buildableOf2[C[_, _]] = buildableOf2Instance.asInstanceOf[BuildableOf2[C]] + + private val buildableOfAlpha = 3 + private val buildableOfBeta = 6 + + private def unsafeBuildableOf[C, T](g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C]) + : Gen[C] = + Gen { (p: Parameters) => + val b = cbf() + if (p.size == 0) + b.result() + else { + // scale up a bit by log, so that we can spread out a bit more with + // higher sizes + val part = partitionBetaDirichlet( + p.rng, + p.size, + buildableOfAlpha, + buildableOfBeta * math.log(p.size + 0.01), + ) + val s = part.length + for (i <- 0 until s) + b += g(p.copy(size = part(i))) + b.result() + } + } + + sealed trait DistinctBuildableOf[C[_]] { + def apply[T](g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]): Gen[C[T]] = + Gen { (p: Parameters) => + val b = cbf() + if (p.size == 0) + b.result() + else { + // scale up a bit by log, so that we can spread out a bit more with + // higher sizes + val part = partitionBetaDirichlet( + p.rng, + p.size, + buildableOfAlpha, + buildableOfBeta * math.log(p.size + 0.01), + ) + val s = part.length + val t = mutable.Set.empty[T] + for (i <- 0 until s) + t += g(p.copy(size = part(i))) + b ++= t + b.result() + } + } + } + + private object distinctBuildableOfInstance extends DistinctBuildableOf[Nothing] + + def distinctBuildableOf[C[_]] = distinctBuildableOfInstance.asInstanceOf[DistinctBuildableOf[C]] + + /** This function terminates with probability equal to the probability of {@code g} generating + * {@code min} distinct elements in finite time. + */ + sealed trait DistinctBuildableOfAtLeast[C[_]] { + def apply[T](min: Int, g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]): Gen[C[T]] = { + Gen { (p: Parameters) => + val b = cbf() + if (p.size < min) { + throw new RuntimeException( + s"Size (${p.size}) is too small for buildable of size at least $min" + ) + } else if (p.size == 0) + b.result() + else { + // scale up a bit by log, so that we can spread out a bit more with + // higher sizes + val s = min + sampleBetaBinomial( + p.rng, + p.size - min, + buildableOfAlpha, + buildableOfBeta * math.log((p.size - min) + 0.01), + ) + val part = partitionDirichlet(p.rng, p.size, s) + val t = mutable.Set.empty[T] + for (i <- 0 until s) { + var element = g.resize(part(i))(p) + while (t.contains(element)) + element = g.resize(part(i))(p) + t += element + } + b ++= t + b.result() + } + } + } + } + + private object distinctBuildableOfAtLeastInstance extends DistinctBuildableOfAtLeast[Nothing] + + def distinctBuildableOfAtLeast[C[_]] = + distinctBuildableOfAtLeastInstance.asInstanceOf[DistinctBuildableOfAtLeast[C]] + + sealed trait BuildableOfN[C[_]] { + def apply[T](n: Int, g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]): Gen[C[T]] = + Gen { (p: Parameters) => + val part = partitionDirichlet(p.rng, p.size, n) + val b = cbf() + for (i <- 0 until n) + b += g(p.copy(size = part(i))) + b.result() + } + } + + private object buildableOfNInstance extends BuildableOfN[Nothing] + + def buildableOfN[C[_]] = buildableOfNInstance.asInstanceOf[BuildableOfN[C]] + + sealed trait DistinctBuildableOfN[C[_]] { + def apply[T](n: Int, g: Gen[T])(implicit cbf: CanBuildFrom[Nothing, T, C[T]]): Gen[C[T]] = + Gen { (p: Parameters) => + val part = partitionDirichlet(p.rng, p.size, n) + val t: mutable.Set[T] = mutable.Set.empty[T] + var i = 0 + while (i < n) { + t += g(p.copy(size = part(i))) + i = t.size + } + val b = cbf() + b ++= t + b.result() + } + } + + private object distinctBuildableOfNInstance extends DistinctBuildableOfN[Nothing] + + def distinctBuildableOfN[C[_]] = + distinctBuildableOfNInstance.asInstanceOf[DistinctBuildableOfN[C]] + + def randomOneOf[T](rng: RandomDataGenerator, is: IndexedSeq[T]): T = { + assert(is.nonEmpty) + is(rng.getRandomGenerator.nextInt(is.length)) + } + + def identifier: Gen[String] = + identifierGen(identifierLeadingChars, identifierChars) + + def plinkSafeIdentifier: Gen[String] = + identifierGen(plinkSafeStartOfIdentifierChars, plinkSafeChars) + + private def identifierGen( + leadingCharacter: IndexedSeq[Char], + trailingCharacters: IndexedSeq[Char], + ): Gen[String] = Gen { p => + val s = 1 + p.rng.getRandomGenerator.nextInt(11) + val b = new StringBuilder() + b += randomOneOf(p.rng, leadingCharacter) + for (_ <- 1 until s) + b += randomOneOf(p.rng, trailingCharacters) + b.result() + } + + def option[T](g: Gen[T], someFraction: Double = 0.8): Gen[Option[T]] = Gen { (p: Parameters) => + if (p.rng.getRandomGenerator.nextDouble < someFraction) + Some(g(p)) + else + None + } + + def nonnegInt: Gen[Int] = Gen(p => p.rng.getRandomGenerator.nextInt() & Int.MaxValue) + + def posInt: Gen[Int] = Gen { (p: Parameters) => + p.rng.getRandomGenerator.nextInt(Int.MaxValue - 1) + 1 + } + + def interestingPosInt: Gen[Int] = oneOfGen( + oneOf(1, 2, Int.MaxValue - 1, Int.MaxValue), + choose(1, 100), + posInt, + ) + + def zip[T1](g1: Gen[T1]): Gen[T1] = g1 + + def zip[T1, T2](g1: Gen[T1], g2: Gen[T2]): Gen[(T1, T2)] = for { + Array(s1, s2) <- partitionSize(2) + x <- g1.resize(s1) + y <- g2.resize(s2) + } yield (x, y) + + def zip[T1, T2, T3](g1: Gen[T1], g2: Gen[T2], g3: Gen[T3]): Gen[(T1, T2, T3)] = for { + Array(s1, s2, s3) <- partitionSize(3) + x <- g1.resize(s1) + y <- g2.resize(s2) + z <- g3.resize(s3) + } yield (x, y, z) + + def zip[T1, T2, T3, T4](g1: Gen[T1], g2: Gen[T2], g3: Gen[T3], g4: Gen[T4]) + : Gen[(T1, T2, T3, T4)] = for { + Array(s1, s2, s3, s4) <- partitionSize(4) + x <- g1.resize(s1) + y <- g2.resize(s2) + z <- g3.resize(s3) + w <- g4.resize(s4) + } yield (x, y, z, w) + + def parameterized[T](f: (Parameters => Gen[T])) = Gen(p => f(p)(p)) + + def sized[T](f: (Int) => Gen[T]): Gen[T] = Gen((p: Parameters) => f(p.size)(p)) + + def applyGen[T, S](gf: Gen[T => S], gx: Gen[T]): Gen[S] = Gen { p => + val f = gf(p) + val x = gx(p) + f(x) + } +} + +class Gen[+T](val apply: Parameters => T) extends AnyVal { + + def sample(): T = apply(Parameters.default) + + def map[U](f: T => U): Gen[U] = Gen(p => f(apply(p))) + + def flatMap[U](f: T => Gen[U]): Gen[U] = Gen(p => f(apply(p))(p)) + + def resize(newSize: Int): Gen[T] = Gen((p: Parameters) => apply(p.copy(size = newSize))) + + def withFilter(f: T => Boolean): Gen[T] = + Gen((p: Parameters) => Stream.continually(apply(p)).takeWhile(f).head) + + def filter(f: T => Boolean): Gen[T] = + withFilter(f) +} diff --git a/hail/hail/test/src/is/hail/check/Prop.scala b/hail/hail/test/src/is/hail/check/Prop.scala new file mode 100644 index 00000000000..b5fd2487e25 --- /dev/null +++ b/hail/hail/test/src/is/hail/check/Prop.scala @@ -0,0 +1,155 @@ +package is.hail.check + +import scala.collection.mutable.ArrayBuffer +import scala.util.{Failure, Random, Success, Try} + +import org.apache.commons.math3.random.RandomDataGenerator + +abstract class Prop { + def apply(p: Parameters, name: Option[String] = None): Unit + + def check(): Unit = { + val size = System.getProperty("check.size", "1000").toInt + val count = System.getProperty("check.count", "10").toInt + + println(s"check: size = $size, count = $count") + + val rng = new RandomDataGenerator() + rng.reSeed(Prop.seed) + apply(Parameters(rng, size, count)) + } +} + +class GenProp1[T1](g1: Gen[T1], f: (T1) => Boolean) extends Prop { + override def apply(p: Parameters, name: Option[String]): Unit = { + val prefix = name.map(_ + ": ").getOrElse("") + for (i <- 0 until p.count) { + val v1 = g1(p) + val r = Try(f(v1)) + r match { + case Success(true) => + case Success(false) => + println(s"""! ${prefix}Falsified after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(null) + case Failure(e) => + println(s"""! ${prefix}Error after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(e) + } + } + println(s" + ${prefix}OK, passed ${p.count} tests.") + } +} + +class GenProp2[T1, T2](g1: Gen[T1], g2: Gen[T2], f: (T1, T2) => Boolean) extends Prop { + override def apply(p: Parameters, name: Option[String]): Unit = { + val prefix = name.map(_ + ": ").getOrElse("") + for (i <- 0 until p.count) { + val v1 = g1(p) + val v2 = g2(p) + val r = Try(f(v1, v2)) + r match { + case Success(true) => + case Success(false) => + println(s"""! ${prefix}Falsified after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(null) + case Failure(e) => + println(s"""! ${prefix}Error after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(e) + } + } + println(s" + ${prefix}OK, passed ${p.count} tests.") + } +} + +class GenProp3[T1, T2, T3](g1: Gen[T1], g2: Gen[T2], g3: Gen[T3], f: (T1, T2, T3) => Boolean) + extends Prop { + override def apply(p: Parameters, name: Option[String]): Unit = { + val prefix = name.map(_ + ": ").getOrElse("") + for (i <- 0 until p.count) { + val v1 = g1(p) + val v2 = g2(p) + val v3 = g3(p) + val r = Try(f(v1, v2, v3)) + r match { + case Success(true) => + case Success(false) => + println(s"""! ${prefix}Falsified after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(null) + case Failure(e) => + println(s"""! ${prefix}Error after $i passed tests.""") + println(s"> ARG_0: $v1") + throw new AssertionError(e) + } + } + println(s" + ${prefix}OK, passed ${p.count} tests.") + } +} + +class Properties(val name: String) extends Prop { + val properties = ArrayBuffer.empty[(String, Prop)] + + class PropertySpecifier { + def update(propName: String, prop: Prop): Unit = + properties += (name + "." + propName) -> prop + } + + lazy val property = new PropertySpecifier + + override def apply(p: Parameters, prefix: Option[String]): Unit = + for ((propName, prop) <- properties) + prop.apply(p, prefix.map(_ + "." + propName).orElse(Some(propName))) + +} + +object Prop { + lazy val _seed: Int = { + val seedStr = System.getProperty("check.seed") + if (seedStr == null) + 1 + else if (seedStr == "random") + Random.nextInt() + else + seedStr.toInt + } + + def seed: Int = { + println(s"check: seed = ${_seed}") + _seed + } + + def check(prop: Prop): Unit = + prop.check() + + def forAll[T1](g1: Gen[Boolean]): Prop = + new GenProp1(g1, identity[Boolean]) + + def forAll[T1](g1: Gen[T1])(p: (T1) => Boolean): Prop = + new GenProp1(g1, p) + + def forAll[T1, T2](g1: Gen[T1], g2: Gen[T2])(p: (T1, T2) => Boolean): Prop = + new GenProp2(g1, g2, p) + + def forAll[T1, T2, T3](g1: Gen[T1], g2: Gen[T2], g3: Gen[T3])(p: (T1, T2, T3) => Boolean): Prop = + new GenProp3(g1, g2, g3, p) + + def forAll[T1](p: (T1) => Boolean)(implicit a1: Arbitrary[T1]): Prop = + new GenProp1(a1.arbitrary, p) + + def forAll[T1, T2](p: (T1, T2) => Boolean)(implicit a1: Arbitrary[T1], a2: Arbitrary[T2]): Prop = + new GenProp2(a1.arbitrary, a2.arbitrary, p) + + def forAll[T1, T2, T3]( + p: (T1, T2, T3) => Boolean + )(implicit + a1: Arbitrary[T1], + a2: Arbitrary[T2], + a3: Arbitrary[T3], + ): Prop = + new GenProp3(a1.arbitrary, a2.arbitrary, a3.arbitrary, p) + +} diff --git a/hail/hail/test/src/is/hail/io/IndexBTreeSuite.scala b/hail/hail/test/src/is/hail/io/IndexBTreeSuite.scala index 4682309ba0b..2a1690962c9 100644 --- a/hail/hail/test/src/is/hail/io/IndexBTreeSuite.scala +++ b/hail/hail/test/src/is/hail/io/IndexBTreeSuite.scala @@ -1,8 +1,6 @@ package is.hail.io import is.hail.HailSuite -import is.hail.check.Gen._ -import is.hail.check.Prop._ import is.hail.check.Properties import org.testng.annotations.Test diff --git a/hail/hail/test/src/is/hail/linalg/BlockMatrixSuite.scala b/hail/hail/test/src/is/hail/linalg/BlockMatrixSuite.scala index cb2a9008a5c..32fa75ba8eb 100644 --- a/hail/hail/test/src/is/hail/linalg/BlockMatrixSuite.scala +++ b/hail/hail/test/src/is/hail/linalg/BlockMatrixSuite.scala @@ -1,16 +1,15 @@ package is.hail.linalg import is.hail.{HailSuite, TestUtils} -import is.hail.check._ -import is.hail.check.Arbitrary._ -import is.hail.check.Gen._ -import is.hail.check.Prop._ import is.hail.expr.ir.{CompileAndEvaluate, GetField, TableCollect, TableLiteral} import is.hail.linalg.BlockMatrix.ops._ import is.hail.types.virtual.{TFloat64, TInt64, TStruct} import is.hail.utils._ - import breeze.linalg.{*, diag, DenseMatrix => BDM, DenseVector => BDV} +import is.hail.check.Arbitrary.arbitrary +import is.hail.check.Gen._ +import is.hail.check.Prop.forAll +import is.hail.check.{Arbitrary, Gen} import org.apache.spark.sql.Row import org.testng.annotations.Test diff --git a/hail/hail/test/src/is/hail/methods/ExprSuite.scala b/hail/hail/test/src/is/hail/methods/ExprSuite.scala index bc16f8b169b..1d8f10befb5 100644 --- a/hail/hail/test/src/is/hail/methods/ExprSuite.scala +++ b/hail/hail/test/src/is/hail/methods/ExprSuite.scala @@ -2,7 +2,6 @@ package is.hail.methods import is.hail.HailSuite import is.hail.backend.HailStateManager -import is.hail.check.Prop._ import is.hail.check.Properties import is.hail.expr._ import is.hail.expr.ir.IRParser diff --git a/hail/hail/test/src/is/hail/methods/LocalLDPruneSuite.scala b/hail/hail/test/src/is/hail/methods/LocalLDPruneSuite.scala index 4be2d10f0bf..63ce2cd71c1 100644 --- a/hail/hail/test/src/is/hail/methods/LocalLDPruneSuite.scala +++ b/hail/hail/test/src/is/hail/methods/LocalLDPruneSuite.scala @@ -3,7 +3,6 @@ package is.hail.methods import is.hail.{HailSuite, TestUtils} import is.hail.annotations.Annotation import is.hail.check.{Gen, Properties} -import is.hail.check.Prop._ import is.hail.expr.ir.{Interpret, MatrixValue, TableValue} import is.hail.utils._ import is.hail.variant._ diff --git a/hail/hail/test/src/is/hail/types/physical/GenInstances.scala b/hail/hail/test/src/is/hail/types/physical/GenInstances.scala new file mode 100644 index 00000000000..63399e65c13 --- /dev/null +++ b/hail/hail/test/src/is/hail/types/physical/GenInstances.scala @@ -0,0 +1,623 @@ +package is.hail.types.physical + +import is.hail.annotations.{Annotation, ExtendedOrdering} +import is.hail.backend.HailStateManager +import is.hail.check.Arbitrary.arbitrary +import is.hail.check.{Arbitrary, Gen} +import is.hail.types.virtual.{ + Field, TArray, TBoolean, TCall, TDict, TFloat32, TFloat64, TInt32, TInt64, TInterval, TLocus, + TSet, TString, TStruct, TTuple, Type, +} +import is.hail.utils.{Interval, genDNAString, triangle, uniqueMaxIndex} +import is.hail.variant.Call.{ + alleleByIndex, allelePair, alleles, isPhased, ploidy, unphasedDiploidGtIndex, +} +import is.hail.variant.Genotype.gqFromPL +import is.hail.variant.{AllelePair, Call, Call2, CallN, Locus, ReferenceGenome} +import org.apache.spark.sql.Row + +import scala.annotation.switch + +trait GenInstances { + + def genScalar(required: Boolean): Gen[PType] = + Gen.oneOf( + PBoolean(required), + PInt32(required), + PInt64(required), + PFloat32(required), + PFloat64(required), + PCanonicalString(required), + PCanonicalCall(required), + ) + + val genOptionalScalar: Gen[PType] = genScalar(false) + + val genRequiredScalar: Gen[PType] = genScalar(true) + + def genComplexType(required: Boolean): Gen[PType] = { + val rgDependents = ReferenceGenome.hailReferences.toArray.map(PCanonicalLocus(_, required)) + val others = Array(PCanonicalCall(required)) + Gen.oneOfSeq(rgDependents ++ others) + } + + def genFields(required: Boolean, genFieldType: Gen[PType]): Gen[Array[PField]] = + Gen.buildableOf[Array]( + Gen.zip(Gen.identifier, genFieldType) + ) + .filter(fields => fields.map(_._1).areDistinct()) + .map(fields => + fields + .iterator + .zipWithIndex + .map { case ((k, t), i) => PField(k, t, i) } + .toArray + ) + + def preGenStruct(required: Boolean, genFieldType: Gen[PType]): Gen[PStruct] = + for (fields <- genFields(required, genFieldType)) yield PCanonicalStruct(fields, required) + + def preGenTuple(required: Boolean, genFieldType: Gen[PType]): Gen[PTuple] = + for (fields <- genFields(required, genFieldType)) + yield PCanonicalTuple(required, fields.map(_.typ): _*) + + private val defaultRequiredGenRatio = 0.2 + + def genStruct: Gen[PStruct] = Gen.coin(defaultRequiredGenRatio).flatMap(preGenStruct(_, genArb)) + + val genOptionalStruct: Gen[PType] = preGenStruct(required = false, genArb) + + val genRequiredStruct: Gen[PType] = preGenStruct(required = true, genArb) + + val genInsertableStruct: Gen[PStruct] = Gen.coin(defaultRequiredGenRatio).flatMap(required => + if (required) + preGenStruct(required = true, genArb) + else + preGenStruct(required = false, genOptional) + ) + + def genSized(size: Int, required: Boolean, genPStruct: Gen[PStruct]): Gen[PType] = + if (size < 1) + Gen.const(PCanonicalStruct.empty(required)) + else if (size < 2) + genScalar(required) + else { + Gen.frequency( + (4, genScalar(required)), + (1, genComplexType(required)), + ( + 1, + genArb.map { + PCanonicalArray(_) + }, + ), + ( + 1, + genArb.map { + PCanonicalSet(_) + }, + ), + ( + 1, + genArb.map { + PCanonicalInterval(_) + }, + ), + (1, preGenTuple(required, genArb)), + (1, Gen.zip(genRequired, genArb).map { case (k, v) => PCanonicalDict(k, v) }), + (1, genPStruct.resize(size)), + ) + } + + def preGenArb(required: Boolean, genStruct: Gen[PStruct] = genStruct): Gen[PType] = + Gen.sized(genSized(_, required, genStruct)) + + def genArb: Gen[PType] = Gen.coin(0.2).flatMap(preGenArb(_)) + + val genOptional: Gen[PType] = preGenArb(required = false) + + val genRequired: Gen[PType] = preGenArb(required = true) + + val genInsertable: Gen[PStruct] = genInsertableStruct + + implicit def arbType: Arbitrary[PType] = Arbitrary(genArb) + + implicit val arbPArray: Arbitrary[PArray] = + Gen { + for { + elem <- arbitrary[PType] + required <- arbitrary[Boolean] + } yield PCanonicalArray(elem, required) + + def genNonmissingValue(sm: HailStateManager): Gen[IndexedSeq[Annotation]] = + Gen.buildableOf[Array](elementType.genValue(sm)).map(x => x: IndexedSeq[Annotation]) + } + + object Contig { + def gen(rg: ReferenceGenome): Gen[(String, Int)] = Gen.oneOfSeq(rg.lengths.toSeq) + } + + object Locus { + def gen(rg: ReferenceGenome): Gen[Locus] = + for { + (contig, length) <- Contig.gen(rg) + pos <- Gen.choose(1, length) + } yield Locus(contig, pos) + } + + object Call { + def check(c: Call, nAlleles: Int): Unit = { + (ploidy(c): @switch) match { + case 0 => + case 1 => + val a = alleleByIndex(c, 0) + assert(a >= 0 && a < nAlleles) + case 2 => + val nGenotypes = triangle(nAlleles) + val udtn = + if (isPhased(c)) { + val p = allelePair(c) + unphasedDiploidGtIndex(Call2(AllelePair.j(p), AllelePair.k(p))) + } else + unphasedDiploidGtIndex(c) + assert( + udtn < nGenotypes, + s"Invalid call found '${c.toString}' for number of alleles equal to '$nAlleles'.", + ) + case _ => + alleles(c).foreach(a => assert(a >= 0 && a < nAlleles)) + } + } + + def gen( + nAlleles: Int, + ploidyGen: Gen[Int] = Gen.choose(0, 2), + phasedGen: Gen[Boolean] = Gen.nextCoin(0.5), + ): Gen[Call] = for { + ploidy <- ploidyGen + phased <- phasedGen + alleles <- Gen.buildableOfN[Array](ploidy, Gen.choose(0, nAlleles - 1)) + } yield { + val c = CallN(alleles, phased) + check(c, nAlleles) + c + } + + def genUnphasedDiploid(nAlleles: Int): Gen[Call] = gen(nAlleles, Gen.const(2), Gen.const(false)) + + def genPhasedDiploid(nAlleles: Int): Gen[Call] = gen(nAlleles, Gen.const(2), Gen.const(true)) + + def genNonmissingValue: Gen[Call] = for { + nAlleles <- Gen.choose(2, 5) + c <- gen(nAlleles) + } yield { + check(c, nAlleles) + c + } + } + + object Genotype { + def genExtremeNonmissing(nAlleles: Int): Gen[Annotation] = { + val m = Int.MaxValue / (nAlleles + 1) + val nGenotypes = triangle(nAlleles) + val gg = for { + c: Option[Call] <- Gen.option(Call.genUnphasedDiploid(nAlleles)) + ad <- Gen.option(Gen.buildableOfN[Array](nAlleles, Gen.choose(0, m))) + dp <- Gen.option(Gen.choose(0, m)) + gq <- Gen.option(Gen.choose(0, 10000)) + pl <- Gen.oneOfGen( + Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, m))), + Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, 100))), + ) + } yield { + c.foreach(c => pl.foreach(pla => pla(Call.unphasedDiploidGtIndex(c)) = 0)) + pl.foreach { pla => + val m = pla.min + var i = 0 + while (i < pla.length) { + pla(i) -= m + i += 1 + } + } + val g = Annotation( + c.orNull, + ad.map(a => a: IndexedSeq[Int]).orNull, + dp.map(_ + ad.map(_.sum).getOrElse(0)).orNull, + gq.orNull, + pl.map(a => a: IndexedSeq[Int]).orNull, + ) + g + } + gg + } + + def genExtreme(nAlleles: Int): Gen[Annotation] = + Gen.frequency( + (100, genExtremeNonmissing(nAlleles)), + (1, Gen.const(null)), + ) + + def genRealisticNonmissing(nAlleles: Int): Gen[Annotation] = { + val nGenotypes = triangle(nAlleles) + val gg = for { + callRate <- Gen.choose(0d, 1d) + alleleFrequencies <- + Gen.buildableOfN[Array](nAlleles, Gen.choose(1e-6, 1d)) // avoid divison by 0 + .map { rawWeights => + val sum = rawWeights.sum + rawWeights.map(_ / sum) + } + c <- Gen.option( + Gen.zip( + Gen.chooseWithWeights(alleleFrequencies), + Gen.chooseWithWeights(alleleFrequencies), + ) + .map { case (gti, gtj) => Call2(gti, gtj) }, + callRate, + ) + ad <- Gen.option(Gen.buildableOfN[Array](nAlleles, Gen.choose(0, 50))) + dp <- Gen.choose(0, 30).map(d => ad.map(o => o.sum + d)) + pl <- Gen.option(Gen.buildableOfN[Array](nGenotypes, Gen.choose(0, 1000)).map { arr => + c match { + case Some(x) => + arr(Call.unphasedDiploidGtIndex(x)) = 0 + arr + case None => + val min = arr.min + arr.map(_ - min) + } + }) + gq <- Gen.choose(-30, 30).map(i => pl.map(pls => math.max(0, gqFromPL(pls) + i))) + } yield Annotation(c.orNull, ad.map(a => a: IndexedSeq[Int]).orNull, dp.orNull, gq.orNull, pl.map(a => a: IndexedSeq[Int]).orNull) + gg + } + + def genRealistic(nAlleles: Int): Gen[Annotation] = + Gen.frequency( + (100, genRealisticNonmissing(nAlleles)), + (1, Gen.const(null)), + ) + + def genGenericCallAndProbabilitiesGenotype(nAlleles: Int): Gen[Annotation] = { + val nGenotypes = triangle(nAlleles) + val gg = for (gp <- Gen.option(Gen.partition(nGenotypes, 32768))) yield { + val c = gp.flatMap(a => Option(uniqueMaxIndex(a))).map(Call2.fromUnphasedDiploidGtIndex(_)) + Row( + c.orNull, + gp.map(gpx => gpx.map(p => p.toDouble / 32768): IndexedSeq[Double]).orNull, + ) + } + Gen.frequency( + (100, gg), + (1, Gen.const(null)), + ) + } + } + + object VariantSubgen { + def random(rg: ReferenceGenome): VariantSubgen = VariantSubgen( + contigGen = Contig.gen(rg), + nAllelesGen = Gen.frequency((5, Gen.const(2)), (1, Gen.choose(2, 10))), + refGen = genDNAString, + altGen = Gen.frequency((10, genDNAString), (1, Gen.const("*"))), + ) + + def plinkCompatible(rg: ReferenceGenome): VariantSubgen = { + val r = random(rg) + val compatible = (1 until 22).map(_.toString).toSet + r.copy( + contigGen = r.contigGen.filter { case (contig, _) => + compatible.contains(contig) + } + ) + } + + def biallelic(rg: ReferenceGenome): VariantSubgen = random(rg).copy(nAllelesGen = Gen.const(2)) + + def plinkCompatibleBiallelic(rg: ReferenceGenome): VariantSubgen = + plinkCompatible(rg).copy(nAllelesGen = Gen.const(2)) + } + + case class VariantSubgen( + contigGen: Gen[(String, Int)], + nAllelesGen: Gen[Int], + refGen: Gen[String], + altGen: Gen[String], + ) { + + def genLocusAlleles: Gen[Annotation] = + for { + (contig, length) <- contigGen + start <- Gen.choose(1, length) + nAlleles <- nAllelesGen + ref <- refGen + altAlleles <- Gen.distinctBuildableOfN[Array]( + nAlleles - 1, + altGen, + ) + .filter(!_.contains(ref)) + } yield Annotation(Locus(contig, start), (ref +: altAlleles).toFastSeq) + } + + object ReferenceGenome { + def gen: Gen[ReferenceGenome] = + for { + name <- Gen.identifier.filter(!ReferenceGenome.hailReferences.contains(_)) + nContigs <- Gen.choose(3, 10) + contigs <- Gen.distinctBuildableOfN[Array](nContigs, Gen.identifier) + lengths <- Gen.buildableOfN[Array](nContigs, Gen.choose(1000000, 500000000)) + contigsIndex = contigs.zip(lengths).toMap + xContig <- Gen.oneOfSeq(contigs) + parXA <- Gen.choose(0, contigsIndex(xContig)) + parXB <- Gen.choose(0, contigsIndex(xContig)) + yContig <- Gen.oneOfSeq(contigs) if yContig != xContig + parYA <- Gen.choose(0, contigsIndex(yContig)) + parYB <- Gen.choose(0, contigsIndex(yContig)) + mtContig <- Gen.oneOfSeq(contigs) if mtContig != xContig && mtContig != yContig + } yield ReferenceGenome( + name, + contigs, + contigs.zip(lengths).toMap, + Set(xContig), + Set(yContig), + Set(mtContig), + Array( + (Locus(xContig, math.min(parXA, parXB)), Locus(xContig, math.max(parXA, parXB))), + (Locus(yContig, math.min(parYA, parYB)), Locus(yContig, math.max(parYA, parYB))), + ), + ) + } + + object Type { + def genScalar(): Gen[Type] = + Gen.oneOf(TBoolean, TInt32, TInt64, TFloat32, + TFloat64, TString, TCall) + + def genComplexType(): Gen[Type] = { + val rgDependents = ReferenceGenome.hailReferences.toArray.map(TLocus(_)) + val others = Array(TCall) + Gen.oneOfSeq(rgDependents ++ others) + } + + def genFields(genFieldType: Gen[Type]): Gen[Array[Field]] = { + Gen.buildableOf[Array]( + Gen.zip(Gen.identifier, genFieldType) + ) + .filter(fields => fields.map(_._1).areDistinct()) + .map(fields => + fields + .iterator + .zipWithIndex + .map { case ((k, t), i) => Field(k, t, i) } + .toArray + ) + } + + def preGenStruct(genFieldType: Gen[Type]): Gen[TStruct] = + for (fields <- genFields(genFieldType)) yield TStruct(fields) + + def preGenTuple(genFieldType: Gen[Type]): Gen[TTuple] = + for (fields <- genFields(genFieldType)) yield TTuple(fields.map(_.typ): _*) + + private val defaultRequiredGenRatio = 0.2 + + def genStruct: Gen[TStruct] = + Gen.coin(defaultRequiredGenRatio).flatMap(c => preGenStruct(genArb)) + + def genSized(size: Int, genTStruct: Gen[TStruct]): Gen[Type] = + if (size < 1) + Gen.const(TStruct.empty) + else if (size < 2) + genScalar() + else { + Gen.frequency( + (4, genScalar()), + (1, genComplexType()), + ( + 1, + genArb.map { + TArray(_) + }, + ), + ( + 1, + genArb.map { + TSet(_) + }, + ), + ( + 1, + genArb.map { + TInterval(_) + }, + ), + (1, preGenTuple(genArb)), + (1, Gen.zip(genRequired, genArb).map { case (k, v) => TDict(k, v) }), + (1, genTStruct.resize(size)), + ) + } + + def preGenArb(genStruct: Gen[TStruct] = genStruct): Gen[Type] = + Gen.sized(genSized(_, genStruct)) + + def genArb: Gen[Type] = preGenArb() + + val genOptional: Gen[Type] = preGenArb() + + val genRequired: Gen[Type] = preGenArb() + + def genWithValue(sm: HailStateManager): Gen[(Type, Annotation)] = for { + s <- Gen.size + // prefer smaller type and bigger values + fraction <- Gen.choose(0.1, 0.3) + x = (fraction * s).toInt + y = s - x + t <- Type.genStruct.resize(x) + v <- t.genValue(sm).resize(y) + } yield (t, v) + + implicit def arbType: Arbitrary[Type] = + Arbitrary(genArb) + } + + object PBaseStruct { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + if (types.isEmpty) { + Gen.const(Annotation.empty) + } else + Gen.uniformSequence(types.map(t => t.genValue(sm))).map(a => Annotation(a: _*)) + } + + object PDict { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Gen.buildableOf2[Map](Gen.zip(keyType.genValue(sm), valueType.genValue(sm))) + } + + object PSet { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Gen.buildableOf[Set](elementType.genValue(sm)) + } + + object TInterval { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Interval.gen(pointType.ordering(sm), pointType.genValue(sm)) + } + + trait Type { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] + + def genValue(sm: HailStateManager): Gen[Annotation] = + Gen.nextCoin(0.05).flatMap(isEmpty => + if (isEmpty) Gen.const(null) else genNonmissingValue(sm) + ) + } + + trait TRNGState { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? + } + + trait TUnion { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? + } + + trait TVariable { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? + } + + trait TVoid { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? + } + + trait TArray { + def genNonmissingValue(sm: HailStateManager): Gen[IndexedSeq[Annotation]] = + Gen.buildableOf[Array](elementType.genValue(sm)).map(x => x: IndexedSeq[Annotation]) + } + + trait TBaseStruct { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + if (types.isEmpty) { + Gen.const(Annotation.empty) + } else + Gen.size.flatMap(fuel => + if (types.length > fuel) + Gen.uniformSequence(types.map(t => Gen.const(null))).map(a => Annotation(a: _*)) + else + Gen.uniformSequence(types.map(t => t.genValue(sm))).map(a => Annotation(a: _*)) + ) + } + + trait TBinary { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Gen.buildableOf(arbitrary[Byte]) + } + + trait TBoolean { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + arbitrary[Boolean] + } + + trait TCall { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Call.genNonmissingValue + } + + trait Foo { + def genValue(sm: HailStateManager): Gen[Annotation] = + if (required) genNonmissingValue(sm) + else Gen.nextCoin(0.05).flatMap(isEmpty => + if (isEmpty) Gen.const(null) else genNonmissingValue(sm) + ) + + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + virtualType.genNonmissingValue(sm) + } + + trait TDict { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Gen.buildableOf2[Map](Gen.zip(keyType.genValue(sm), valueType.genValue(sm))) + } + + trait TFloat32 { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + arbitrary[Float] + } + + trait TFloat64 { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + arbitrary[Double] + } + + trait TInt32 { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + arbitrary[Int] + } + + trait TInt64 { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + arbitrary[Long] + } + + trait TLocus { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Locus.gen(sm.referenceGenomes(rgName)) + } + + trait TNDArray { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = ??? + } + + trait TSet { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + Gen.buildableOf[Set](elementType.genValue(sm)) + } + + trait TStream { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = + throw new UnsupportedOperationException("Streams don't have associated annotations.") + } + + trait TString { + def genNonmissingValue(sm: HailStateManager): Gen[Annotation] = arbitrary[String] + } + + trait Interval { + def gen[P](pord: ExtendedOrdering, pgen: Gen[P]): Gen[Interval] = + Gen.zip(pgen, pgen, Gen.coin(), Gen.coin()) + .filter { case (x, y, s, e) => pord.compare(x, y) != 0 || (s && e) } + .map { case (x, y, s, e) => + if (pord.compare(x, y) < 0) + Interval(x, y, s, e) + else + Interval(y, x, s, e) + } + } + + object utils { + def genBase: Gen[Char] = Gen.oneOf('A', 'C', 'T', 'G') + + def genDNAString: Gen[String] = Gen.stringOf(genBase) + .resize(12) + .filter(s => !s.isEmpty) + } + +} diff --git a/hail/hail/test/src/is/hail/utils/BinaryHeapSuite.scala b/hail/hail/test/src/is/hail/utils/BinaryHeapSuite.scala index ea1f69313d8..ccb0796ce5d 100644 --- a/hail/hail/test/src/is/hail/utils/BinaryHeapSuite.scala +++ b/hail/hail/test/src/is/hail/utils/BinaryHeapSuite.scala @@ -1,8 +1,6 @@ package is.hail.utils -import is.hail.check.Arbitrary._ import is.hail.check.Gen -import is.hail.check.Prop._ import scala.collection.mutable diff --git a/hail/hail/test/src/is/hail/utils/BitVectorSuite.scala b/hail/hail/test/src/is/hail/utils/BitVectorSuite.scala index af287922264..d6902df2fcb 100644 --- a/hail/hail/test/src/is/hail/utils/BitVectorSuite.scala +++ b/hail/hail/test/src/is/hail/utils/BitVectorSuite.scala @@ -1,8 +1,5 @@ package is.hail.utils -import is.hail.check._ -import is.hail.check.Prop._ - import org.scalatestplus.testng.TestNGSuite import org.testng.annotations.Test diff --git a/hail/hail/test/src/is/hail/variant/GenotypeSuite.scala b/hail/hail/test/src/is/hail/variant/GenotypeSuite.scala index f88eff4b872..a2a057b30fc 100644 --- a/hail/hail/test/src/is/hail/variant/GenotypeSuite.scala +++ b/hail/hail/test/src/is/hail/variant/GenotypeSuite.scala @@ -2,7 +2,6 @@ package is.hail.variant import is.hail.TestUtils import is.hail.check.Gen -import is.hail.check.Prop._ import is.hail.testUtils.Variant import is.hail.utils._ diff --git a/hail/hail/test/src/is/hail/variant/ReferenceGenomeSuite.scala b/hail/hail/test/src/is/hail/variant/ReferenceGenomeSuite.scala index 693a56c6061..05422e43c51 100644 --- a/hail/hail/test/src/is/hail/variant/ReferenceGenomeSuite.scala +++ b/hail/hail/test/src/is/hail/variant/ReferenceGenomeSuite.scala @@ -2,7 +2,6 @@ package is.hail.variant import is.hail.{HailSuite, TestUtils} import is.hail.backend.{ExecuteContext, HailStateManager} -import is.hail.check.Prop._ import is.hail.check.Properties import is.hail.expr.ir.EmitFunctionBuilder import is.hail.io.reference.{FASTAReader, FASTAReaderConfig, LiftOver}