Skip to content

Commit

Permalink
Remove Lazy for Scala 2.13
Browse files Browse the repository at this point in the history
  • Loading branch information
cchantep committed Mar 12, 2023
1 parent a27b04b commit daba205
Show file tree
Hide file tree
Showing 11 changed files with 634 additions and 285 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
scala: [2.13.10, 2.12.16]
scala: [2.13.10, 2.12.17]
java: [temurin@8]
project: [root-spark31, root-spark32, root-spark33]
exclude:
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
scala: [2.12.16]
scala: [2.12.17]
java: [temurin@8]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -160,7 +160,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
scala: [2.12.16]
scala: [2.12.17]
java: [temurin@8]
runs-on: ${{ matrix.os }}
steps:
Expand Down
39 changes: 34 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ val scalacheck = "1.17.0"
val scalacheckEffect = "1.0.4"
val refinedVersion = "0.10.2"

val Scala212 = "2.12.16"
val Scala212 = "2.12.17"
val Scala213 = "2.13.10"

ThisBuild / tlBaseVersion := "0.13"

ThisBuild / crossScalaVersions := Seq(Scala213, Scala212)
ThisBuild / scalaVersion := Scala212
ThisBuild / tlSkipIrrelevantScalas := true
Expand Down Expand Up @@ -70,19 +69,21 @@ lazy val dataset = project
.dependsOn(core % "test->test;compile->compile")

lazy val `dataset-spark32` = project
.settings(name := "frameless-dataset-spark32")
.settings(name := ".frameless-dataset-spark32")
.settings(sourceDirectory := (dataset / sourceDirectory).value)
.settings(datasetSettings)
.settings(sparkDependencies(spark32Version))
.settings(spark32Settings)
.settings(mimaPreviousArtifacts := Set.empty)
.dependsOn(core % "test->test;compile->compile")

lazy val `dataset-spark31` = project
.settings(name := "frameless-dataset-spark31")
.settings(name := ".frameless-dataset-spark31")
.settings(sourceDirectory := (dataset / sourceDirectory).value)
.settings(datasetSettings)
.settings(sparkDependencies(spark31Version))
.settings(spark31Settings)
.settings(mimaPreviousArtifacts := Set.empty)
.dependsOn(core % "test->test;compile->compile")

lazy val refined = project
Expand Down Expand Up @@ -144,6 +145,7 @@ lazy val docs = project
.settings(sparkMlDependencies(sparkVersion, Compile))
.settings(
addCompilerPlugin("org.typelevel" % "kind-projector" % "0.13.2" cross CrossVersion.full),
libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always,
scalacOptions += "-Ydelambdafy:inline"
)
.dependsOn(dataset, cats, ml)
Expand Down Expand Up @@ -185,7 +187,18 @@ lazy val datasetSettings = framelessSettings ++ framelessTypedDatasetREPL ++ Seq
imt("frameless.RecordEncoderFields.deriveRecordLast"),
mc("frameless.functions.FramelessLit"),
mc(f"frameless.functions.FramelessLit$$"),
dmm("frameless.functions.package.litAggr")
dmm("frameless.functions.package.litAggr"),
imt("frameless.RecordEncoder.this"),
imt("frameless.TypedEncoder.usingDerivation"),
imt("frameless.TypedEncoder.collectionEncoder"),
imt("frameless.TypedEncoder.collectionEncoder"),
imt("frameless.TypedEncoder.usingDerivation"),
imt("frameless.ops.As.equivGeneric"),
imt("frameless.ops.As.equivHList"),
imt("frameless.ops.As.equivHList"),
imt("frameless.ops.As.equivGeneric"),
dmm("frameless.ops.LowPriorityAs.equivHList"),
dmm("frameless.ops.LowPriorityAs.equivGeneric")
)
}
)
Expand Down Expand Up @@ -251,9 +264,25 @@ lazy val scalacOptionSettings = Def.setting {
baseScalacOptions(scalaVersion.value)
}

def unmanaged(ver: String, base: File): Seq[File] =
CrossVersion.partialVersion(ver) match {
case Some((2, n)) if n < 13 =>
Seq(base / "scala-2.13-")

case _ =>
Seq(base / "scala-2.13+")

}

lazy val framelessSettings = Seq(
scalacOptions ++= scalacOptionSettings.value,
Test / testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-oDF"),
Compile / unmanagedSourceDirectories ++= {
unmanaged(scalaVersion.value, (Compile / sourceDirectory).value)
},
Test / unmanagedSourceDirectories ++= {
unmanaged(scalaVersion.value, (Test / sourceDirectory).value)
},
libraryDependencies ++= Seq(
"com.chuusai" %% "shapeless" % shapeless,
"org.scalatest" %% "scalatest" % scalatest % Test,
Expand Down
75 changes: 75 additions & 0 deletions dataset/src/main/scala-2.13+/frameless/RecordEncoder.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package frameless

import org.apache.spark.sql.FramelessInternals

import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.objects.{ Invoke, NewInstance }
import org.apache.spark.sql.types._

import shapeless._
import shapeless.ops.hlist.IsHCons

import scala.reflect.ClassTag

class RecordEncoder[F, G <: HList, H <: HList]
(implicit
i0: LabelledGeneric.Aux[F, G],
i1: DropUnitValues.Aux[G, H],
i2: IsHCons[H],
fields: RecordEncoderFields[H],
newInstanceExprs: NewInstanceExprs[G],
classTag: ClassTag[F]
) extends TypedEncoder[F] {
def nullable: Boolean = false

def jvmRepr: DataType = FramelessInternals.objectTypeFor[F]

def catalystRepr: DataType = {
val structFields = fields.value.map { field =>
StructField(
name = field.name,
dataType = field.encoder.catalystRepr,
nullable = field.encoder.nullable,
metadata = Metadata.empty
)
}

StructType(structFields)
}

def toCatalyst(path: Expression): Expression = {
val nameExprs = fields.value.map { field =>
Literal(field.name)
}

val valueExprs = fields.value.map { field =>
val fieldPath = Invoke(path, field.name, field.encoder.jvmRepr, Nil)
field.encoder.toCatalyst(fieldPath)
}

// the way exprs are encoded in CreateNamedStruct
val exprs = nameExprs.zip(valueExprs).flatMap {
case (nameExpr, valueExpr) => nameExpr :: valueExpr :: Nil
}

val createExpr = CreateNamedStruct(exprs)
val nullExpr = Literal.create(null, createExpr.dataType)

If(IsNull(path), nullExpr, createExpr)
}

def fromCatalyst(path: Expression): Expression = {
val exprs = fields.value.map { field =>
field.encoder.fromCatalyst(
GetStructField(path, field.ordinal, Some(field.name)))
}

val newArgs = newInstanceExprs.from(exprs)
val newExpr = NewInstance(
classTag.runtimeClass, newArgs, jvmRepr, propagateNull = true)

val nullExpr = Literal.create(null, jvmRepr)

If(IsNull(path), nullExpr, newExpr)
}
}
196 changes: 196 additions & 0 deletions dataset/src/main/scala-2.13+/frameless/TypedEncoderCompat.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package frameless

import scala.reflect.ClassTag

import org.apache.spark.sql.types._

import org.apache.spark.sql.FramelessInternals

import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.objects._
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}

import shapeless._
import shapeless.ops.hlist.IsHCons

private[frameless] trait TypedEncoderCompat {
/** Encodes things using injection if there is one defined */
implicit def usingInjection[A: ClassTag, B]
(implicit inj: Injection[A, B], trb: TypedEncoder[B]): TypedEncoder[A] =
new TypedEncoder[A] {
def nullable: Boolean = trb.nullable
def jvmRepr: DataType = FramelessInternals.objectTypeFor[A](classTag)
def catalystRepr: DataType = trb.catalystRepr

def fromCatalyst(path: Expression): Expression = {
val bexpr = trb.fromCatalyst(path)
Invoke(Literal.fromObject(inj), "invert", jvmRepr, Seq(bexpr))
}

def toCatalyst(path: Expression): Expression =
trb.toCatalyst(Invoke(
Literal.fromObject(inj), "apply", trb.jvmRepr, Seq(path)))
}

/** Encodes things as records if there is no Injection defined */
implicit def usingDerivation[F, G <: HList, H <: HList]
(implicit
i0: LabelledGeneric.Aux[F, G],
i1: DropUnitValues.Aux[G, H],
i2: IsHCons[H],
i3: RecordEncoderFields[H],
i4: NewInstanceExprs[G],
i5: ClassTag[F]
): TypedEncoder[F] = new RecordEncoder[F, G, H]

implicit def arrayEncoder[T: ClassTag](
implicit i0: RecordFieldEncoder[T]): TypedEncoder[Array[T]] =
new TypedEncoder[Array[T]] {
private lazy val encodeT = i0.encoder

def nullable: Boolean = false

lazy val jvmRepr: DataType = i0.jvmRepr match {
case ByteType => BinaryType
case _ => FramelessInternals.objectTypeFor[Array[T]]
}

lazy val catalystRepr: DataType = i0.jvmRepr match {
case ByteType => BinaryType
case _ => ArrayType(encodeT.catalystRepr, encodeT.nullable)
}

def toCatalyst(path: Expression): Expression =
i0.jvmRepr match {
case IntegerType | LongType | DoubleType | FloatType |
ShortType | BooleanType =>
StaticInvoke(
classOf[UnsafeArrayData],
catalystRepr, "fromPrimitiveArray", path :: Nil)

case ByteType => path

case _ => MapObjects(
i0.toCatalyst, path, i0.jvmRepr, encodeT.nullable)
}

def fromCatalyst(path: Expression): Expression =
encodeT.jvmRepr match {
case IntegerType => Invoke(path, "toIntArray", jvmRepr)
case LongType => Invoke(path, "toLongArray", jvmRepr)
case DoubleType => Invoke(path, "toDoubleArray", jvmRepr)
case FloatType => Invoke(path, "toFloatArray", jvmRepr)
case ShortType => Invoke(path, "toShortArray", jvmRepr)
case BooleanType => Invoke(path, "toBooleanArray", jvmRepr)

case ByteType => path

case _ =>
Invoke(MapObjects(
i0.fromCatalyst, path,
encodeT.catalystRepr, encodeT.nullable), "array", jvmRepr)
}

override def toString: String = s"arrayEncoder($jvmRepr)"
}

implicit def collectionEncoder[C[X] <: Seq[X], T]
(implicit
i0: RecordFieldEncoder[T],
i1: ClassTag[C[T]]): TypedEncoder[C[T]] = new TypedEncoder[C[T]] {
private lazy val encodeT = i0.encoder

def nullable: Boolean = false

def jvmRepr: DataType = FramelessInternals.objectTypeFor[C[T]](i1)

def catalystRepr: DataType =
ArrayType(encodeT.catalystRepr, encodeT.nullable)

def toCatalyst(path: Expression): Expression = {
if (ScalaReflection.isNativeType(i0.jvmRepr)) {
NewInstance(classOf[GenericArrayData], path :: Nil, catalystRepr)
} else {
MapObjects(i0.toCatalyst, path, i0.jvmRepr, encodeT.nullable)
}
}

def fromCatalyst(path: Expression): Expression =
MapObjects(
i0.fromCatalyst,
path,
encodeT.catalystRepr,
encodeT.nullable,
Some(i1.runtimeClass) // This will cause MapObjects to build a collection of type C[_] directly
)

override def toString: String = s"collectionEncoder($jvmRepr)"
}

/**
* @tparam A the key type
* @tparam B the value type
* @param i0 the keys encoder
* @param i1 the values encoder
*/
implicit def mapEncoder[A: NotCatalystNullable, B]
(implicit
i0: RecordFieldEncoder[A],
i1: RecordFieldEncoder[B],
): TypedEncoder[Map[A, B]] = new TypedEncoder[Map[A, B]] {
def nullable: Boolean = false

def jvmRepr: DataType = FramelessInternals.objectTypeFor[Map[A, B]]

private lazy val encodeA = i0.encoder
private lazy val encodeB = i1.encoder

lazy val catalystRepr: DataType = MapType(
encodeA.catalystRepr, encodeB.catalystRepr, encodeB.nullable)

def fromCatalyst(path: Expression): Expression = {
val keyArrayType = ArrayType(encodeA.catalystRepr, containsNull = false)

val keyData = Invoke(
MapObjects(
i0.fromCatalyst,
Invoke(path, "keyArray", keyArrayType),
encodeA.catalystRepr
),
"array",
FramelessInternals.objectTypeFor[Array[Any]]
)

val valueArrayType = ArrayType(encodeB.catalystRepr, encodeB.nullable)

val valueData = Invoke(
MapObjects(
i1.fromCatalyst,
Invoke(path, "valueArray", valueArrayType),
encodeB.catalystRepr
),
"array",
FramelessInternals.objectTypeFor[Array[Any]]
)

StaticInvoke(
ArrayBasedMapData.getClass,
jvmRepr,
"toScalaMap",
keyData :: valueData :: Nil)
}

def toCatalyst(path: Expression): Expression =
ExternalMapToCatalyst(
path,
i0.jvmRepr,
i0.toCatalyst,
false,
i1.jvmRepr,
i1.toCatalyst,
encodeB.nullable)

override def toString = s"mapEncoder($jvmRepr)"
}
}
Loading

0 comments on commit daba205

Please sign in to comment.