Skip to content

Commit 83e4cc7

Browse files
author
Abby Whittier
committed
feat: add Mill build system support with Thrift and Python API integration
Add Mill build configuration alongside existing sbt build: - Mill package.mill files for all modules (api, aggregator, online, spark, flink, service) - Thrift code generation for Java (api module) - Python Thrift generation and wheel building commands - CircleCI jobs for Mill compilation, tests, and Python API builds - prepareScalaSources command to handle scala-2.12 version-specific sources Mill commands: ./mill api.prepareScalaSources # Copy scala-2.12 sources (one-time setup) ./mill <module>.compile # Compile modules ./mill <module>.test # Run tests ./mill generatePythonThrift # Generate Python Thrift files ./mill buildPythonApi # Build Python wheel Test fixes: - Migrate test modules from ScalaTest to JUnit4 (tests use JUnit annotations) - Fix Mockito verification in FetcherCacheTest - Fix MetadataExporterTest to use filesystem path instead of JAR resource - Update Python build script to use venv Both Mill and sbt build systems work simultaneously.
1 parent cb0ceed commit 83e4cc7

File tree

12 files changed

+1170
-4
lines changed

12 files changed

+1170
-4
lines changed

.circleci/config.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,76 @@ jobs:
195195
destination: spark_warehouse.tar.gz
196196
when: on_fail
197197

198+
"Mill -- Compile All":
199+
executor: docker_baseimg_executor
200+
steps:
201+
- checkout
202+
- run:
203+
name: Install dependencies for Mill native binary
204+
command: |
205+
# Install build essentials and upgrade libc6 for Mill native binary (requires GLIBC 2.32+)
206+
apt-get update
207+
apt-get install -y build-essential libc6
208+
- run:
209+
name: Compile all modules with Mill
210+
shell: /bin/bash -leuxo pipefail
211+
command: |
212+
conda activate chronon_py
213+
# Prepare scala version-specific sources for Mill
214+
./mill api.prepareScalaSources
215+
# Compile all modules (./mill wrapper auto-downloads correct version)
216+
./mill api.compile aggregator.compile online.compile spark.compile flink.compile
217+
218+
"Mill -- Tests":
219+
executor: docker_baseimg_executor_xxlarge
220+
steps:
221+
- checkout
222+
- run:
223+
name: Install dependencies for Mill native binary
224+
command: |
225+
# Install build essentials and upgrade libc6 for Mill native binary (requires GLIBC 2.32+)
226+
apt-get update
227+
apt-get install -y build-essential libc6
228+
- run:
229+
name: Run all tests with Mill
230+
shell: /bin/bash -leuxo pipefail
231+
command: |
232+
conda activate chronon_py
233+
./mill api.prepareScalaSources
234+
./mill api.test aggregator.test online.test spark.test flink.test
235+
- run:
236+
name: Compress spark-warehouse
237+
command: |
238+
cd /tmp/ && tar -czvf spark-warehouse.tar.gz chronon/spark-warehouse
239+
when: on_fail
240+
- store_artifacts:
241+
path: /tmp/spark-warehouse.tar.gz
242+
destination: spark_warehouse.tar.gz
243+
when: on_fail
244+
245+
"Mill -- Python API Build":
246+
executor: docker_baseimg_executor
247+
steps:
248+
- checkout
249+
- run:
250+
name: Install dependencies for Mill native binary
251+
command: |
252+
# Install build essentials and upgrade libc6 for Mill native binary (requires GLIBC 2.32+)
253+
apt-get update
254+
apt-get install -y build-essential libc6
255+
- run:
256+
name: Build Python API with Mill
257+
shell: /bin/bash -leuxo pipefail
258+
command: |
259+
conda activate chronon_py
260+
# Set project root for Mill commands
261+
export CHRONON_ROOT=/chronon
262+
# Generate Python Thrift and build wheel (./mill wrapper auto-downloads correct version)
263+
./mill generatePythonThrift
264+
./mill buildPythonApi
265+
- store_artifacts:
266+
path: /chronon/api/py/dist
267+
198268
workflows:
199269
build_test_deploy:
200270
jobs:
@@ -221,5 +291,14 @@ workflows:
221291
requires:
222292
- "Pull Docker Image"
223293
- "Scala 13 -- Iceberg Table Utils Tests":
294+
requires:
295+
- "Pull Docker Image"
296+
- "Mill -- Compile All":
297+
requires:
298+
- "Pull Docker Image"
299+
- "Mill -- Tests":
300+
requires:
301+
- "Pull Docker Image"
302+
- "Mill -- Python API Build":
224303
requires:
225304
- "Pull Docker Image"

aggregator/package.mill

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package build.aggregator
2+
3+
import mill._
4+
import mill.javalib._
5+
import mill.javalib.publish._
6+
import mill.scalalib.SbtModule
7+
8+
object `package` extends SbtModule with PublishModule {
9+
10+
def scalaVersion = "2.12.12"
11+
12+
def mvnDeps = Seq(
13+
mvn"com.google.code.gson:gson:2.8.6",
14+
mvn"com.yahoo.datasketches:sketches-core:0.13.4",
15+
mvn"org.apache.commons:commons-lang3:3.12.0"
16+
)
17+
18+
def moduleDeps = super.moduleDeps ++ Seq(build.api)
19+
20+
def compileMvnDeps = Seq(
21+
mvn"org.apache.spark::spark-core:3.1.1",
22+
mvn"org.apache.spark::spark-sql:3.1.1"
23+
)
24+
25+
def pomSettings = PomSettings(
26+
"Chronon is a feature engineering platform",
27+
"ai.chronon",
28+
"https://github.com/airbnb/chronon",
29+
Seq(License(
30+
"Apache 2",
31+
"Apache 2",
32+
"http://www.apache.org/licenses/LICENSE-2.0.txt",
33+
false,
34+
false,
35+
"repo"
36+
)),
37+
VersionControl(
38+
Some("https://github.com/airbnb/chronon"),
39+
Some("scm:[email protected]:airbnb/chronon.git"),
40+
None,
41+
None
42+
),
43+
Seq(Developer(
44+
"nikhilsimha",
45+
"Nikhil Simha",
46+
"http://nikhilsimha.com",
47+
None,
48+
None
49+
))
50+
)
51+
52+
def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"
53+
54+
object test extends SbtTests with TestModule.Junit4 {
55+
56+
def mvnDeps = Seq(
57+
mvn"junit:junit:4.13.2",
58+
mvn"com.novocode:junit-interface:0.11",
59+
mvn"org.scalatest::scalatest:3.2.15",
60+
mvn"org.apache.commons:commons-math3:3.6.1"
61+
)
62+
63+
def moduleDeps = super.moduleDeps ++ Seq(build.api.test)
64+
65+
def testSandboxWorkingDir = false
66+
def testParallelism = false
67+
68+
}
69+
}

api/package.mill

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package build.api
2+
3+
import mill._
4+
import mill.javalib._
5+
import mill.javalib.publish._
6+
import mill.scalalib.SbtModule
7+
8+
object `package` extends SbtModule with PublishModule {
9+
10+
def scalaVersion = "2.12.12"
11+
12+
def mvnDeps = Seq(
13+
mvn"org.apache.thrift:libthrift:0.13.0",
14+
mvn"org.scala-lang.modules::scala-collection-compat:2.6.0",
15+
mvn"org.scala-lang:scala-reflect:2.12.12"
16+
)
17+
18+
def compileMvnDeps = Seq(
19+
mvn"org.apache.spark::spark-core:3.1.1",
20+
mvn"org.apache.spark::spark-sql:3.1.1"
21+
)
22+
23+
def pomSettings = PomSettings(
24+
"Chronon is a feature engineering platform",
25+
"ai.chronon",
26+
"https://github.com/airbnb/chronon",
27+
Seq(License(
28+
"Apache 2",
29+
"Apache 2",
30+
"http://www.apache.org/licenses/LICENSE-2.0.txt",
31+
false,
32+
false,
33+
"repo"
34+
)),
35+
VersionControl(
36+
Some("https://github.com/airbnb/chronon"),
37+
Some("scm:[email protected]:airbnb/chronon.git"),
38+
None,
39+
None
40+
),
41+
Seq(Developer(
42+
"nikhilsimha",
43+
"Nikhil Simha",
44+
"http://nikhilsimha.com",
45+
None,
46+
None
47+
))
48+
)
49+
50+
def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"
51+
52+
/**
53+
* Prepare Scala version-specific sources for Mill compilation.
54+
*
55+
* Mill's security model prevents reading from scala-2.12/ during Task execution,
56+
* so we copy version-specific sources to the main scala/ directory before compilation.
57+
*
58+
* This is a one-time setup command - run it once per clean build:
59+
* ./mill api.prepareScalaSources
60+
*
61+
* SBT users don't need this - sbt automatically picks up scala-2.12/ directories.
62+
*/
63+
def prepareScalaSources() = Task.Command {
64+
val scala212Dir = millSourcePath0 / "src" / "main" / "scala-2.12"
65+
val scalaDir = millSourcePath0 / "src" / "main" / "scala"
66+
67+
os.walk(scala212Dir)
68+
.filter(_.ext == "scala")
69+
.foreach { sourceFile =>
70+
val relativePath = sourceFile.relativeTo(scala212Dir)
71+
val targetFile = scalaDir / relativePath
72+
os.makeDir.all(targetFile / os.up)
73+
os.copy.over(sourceFile, targetFile, createFolders = true)
74+
println(s"✓ Copied: ${sourceFile.last}")
75+
}
76+
println(s"✓ Scala 2.12 sources prepared for Mill compilation")
77+
()
78+
}
79+
80+
// Generate Java sources from Thrift
81+
def generatedSources = Task {
82+
val thriftFile = millSourcePath0 / "thrift" / "api.thrift"
83+
val outDir = Task.dest / "java"
84+
os.remove.all(outDir)
85+
os.makeDir.all(outDir)
86+
os.proc("thrift", "--gen", "java", "-out", outDir, thriftFile)
87+
.call(stdout = os.Inherit)
88+
89+
os.walk(outDir).filter(_.ext == "java").map(PathRef(_))
90+
}
91+
92+
object test extends SbtTests with TestModule.Junit4 {
93+
94+
def mvnDeps = Seq(
95+
mvn"com.novocode:junit-interface:0.11",
96+
mvn"junit:junit:4.13.2",
97+
mvn"org.scalatest::scalatest:3.2.15",
98+
mvn"org.scalatestplus::mockito-3-4:3.2.10.0"
99+
)
100+
101+
def testSandboxWorkingDir = false
102+
def testParallelism = false
103+
104+
}
105+
}

api/py/python-api-build.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
2626
echo "Removing old distributions..."
2727
rm -f $SCRIPT_DIR/dist/*
2828

29+
# Create and activate venv if it doesn't exist
30+
VENV_DIR="$SCRIPT_DIR/../../venv"
31+
if [[ ! -d "$VENV_DIR" ]]; then
32+
echo "Creating Python virtual environment..."
33+
python3 -m venv "$VENV_DIR"
34+
fi
35+
36+
# Activate venv
37+
echo "Activating Python virtual environment..."
38+
source "$VENV_DIR/bin/activate"
39+
40+
# Install build dependencies in venv if needed
41+
if ! python3 -c "import build" 2>/dev/null; then
42+
echo "Installing Python build dependencies..."
43+
pip install --quiet build twine
44+
fi
45+
2946
# The default action is "build"
3047
if [[ -z "${ACTION}" ]] || [[ "${ACTION}" == "build" ]]; then
3148
PYPI_REPOSITORY="internal"

0 commit comments

Comments
 (0)