Skip to content

Commit 9233bf8

Browse files
author
Abby Whittier
committed
feat: add Mill build system support with Thrift and Python API integration
Add Mill build configuration alongside existing sbt build: - Mill package.mill files for all modules (api, aggregator, online, spark, flink, service) - Thrift code generation for Java (api module) - Python Thrift generation and wheel building commands - CircleCI jobs for Mill compilation, tests, and Python API builds - prepareScalaSources command to handle scala-2.12 version-specific sources Mill commands: ./mill api.prepareScalaSources # Copy scala-2.12 sources (one-time setup) ./mill <module>.compile # Compile modules ./mill <module>.test # Run tests ./mill generatePythonThrift # Generate Python Thrift files ./mill buildPythonApi # Build Python wheel Test fixes: - Migrate test modules from ScalaTest to JUnit4 (tests use JUnit annotations) - Fix Mockito verification in FetcherCacheTest - Fix MetadataExporterTest to use filesystem path instead of JAR resource - Update Python build script to use venv Both Mill and sbt build systems work simultaneously.
1 parent cb0ceed commit 9233bf8

File tree

12 files changed

+1236
-4
lines changed

12 files changed

+1236
-4
lines changed

.circleci/config.yml

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,84 @@ executors:
1515
working_directory: /chronon
1616
docker:
1717
- image: houpy0829/chronon-ci:base--f87f50dc520f7a73894ae024eb78bd305d5b08e2
18+
modern_ubuntu_executor:
19+
resource_class: xlarge
20+
working_directory: /chronon
21+
docker:
22+
- image: cimg/openjdk:11.0
23+
modern_ubuntu_executor_xxlarge:
24+
resource_class: 2xlarge
25+
working_directory: /chronon
26+
docker:
27+
- image: cimg/openjdk:11.0
28+
29+
commands:
30+
install_build_dependencies:
31+
description: "Install Thrift, Conda, SBT, and Mill on Ubuntu 22.04"
32+
steps:
33+
- run:
34+
name: Install system dependencies
35+
command: |
36+
sudo apt-get update
37+
sudo apt-get install -y \
38+
automake \
39+
bison \
40+
cmake \
41+
flex \
42+
g++ \
43+
git \
44+
libboost-dev \
45+
libboost-filesystem-dev \
46+
libboost-program-options-dev \
47+
libboost-system-dev \
48+
libboost-test-dev \
49+
libevent-dev \
50+
libssl-dev \
51+
libtool \
52+
make \
53+
pkg-config
54+
- run:
55+
name: Install Thrift 0.11.0 from source
56+
command: |
57+
export THRIFT_VERSION=0.11.0
58+
curl -sSL "http://archive.apache.org/dist/thrift/$THRIFT_VERSION/thrift-$THRIFT_VERSION.tar.gz" -o thrift.tar.gz
59+
mkdir -p /tmp/thrift
60+
tar zxf thrift.tar.gz -C /tmp/thrift --strip-components=1
61+
rm thrift.tar.gz
62+
cd /tmp/thrift
63+
./configure --without-python --without-cpp
64+
make
65+
sudo make install
66+
cd /
67+
rm -rf /tmp/thrift
68+
thrift --version
69+
- run:
70+
name: Install SBT
71+
command: |
72+
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
73+
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add
74+
sudo apt-get update
75+
sudo apt-get install -y sbt
76+
- run:
77+
name: Install Miniconda
78+
command: |
79+
wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
80+
bash ~/miniconda.sh -b -p $HOME/miniconda
81+
echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
82+
source $BASH_ENV
83+
conda create -y -n chronon_py python=3.7
84+
conda install -y -q -n chronon_py --no-deps virtualenv
85+
$HOME/miniconda/envs/chronon_py/bin/pip install \
86+
flake8==5.0.4 flake8-quotes==3.3.1 thrift==0.11.0 click==7.0 thrift_json==0.1.0 nose>=1.3.7
87+
$HOME/miniconda/envs/chronon_py/bin/pip install build
88+
- run:
89+
name: Install Mill with checksum verification
90+
command: |
91+
curl -L https://github.com/com-lihaoyi/mill/releases/download/0.10.15/0.10.15 -o mill
92+
echo "d90132b1a4ebe4d55d2bc43b3f18b5d6e8e3d12d89a83f83ad2276867e127916 mill" | sha256sum -c -
93+
chmod +x mill
94+
sudo mv mill /usr/local/bin/mill
95+
mill --version
1896
1997
jobs:
2098
"Pull Docker Image":
@@ -195,6 +273,64 @@ jobs:
195273
destination: spark_warehouse.tar.gz
196274
when: on_fail
197275

276+
"Mill -- Compile All":
277+
executor: modern_ubuntu_executor
278+
steps:
279+
- checkout
280+
- install_build_dependencies
281+
- run:
282+
name: Compile all modules with Mill
283+
shell: /bin/bash -leuxo pipefail
284+
command: |
285+
source $BASH_ENV
286+
conda activate chronon_py
287+
# Prepare scala version-specific sources for Mill
288+
mill api.prepareScalaSources
289+
# Compile all modules
290+
mill api.compile aggregator.compile online.compile spark.compile flink.compile
291+
292+
"Mill -- Tests":
293+
executor: modern_ubuntu_executor_xxlarge
294+
steps:
295+
- checkout
296+
- install_build_dependencies
297+
- run:
298+
name: Run all tests with Mill
299+
shell: /bin/bash -leuxo pipefail
300+
command: |
301+
source $BASH_ENV
302+
conda activate chronon_py
303+
mill api.prepareScalaSources
304+
mill api.test aggregator.test online.test spark.test flink.test
305+
- run:
306+
name: Compress spark-warehouse
307+
command: |
308+
cd /tmp/ && tar -czvf spark-warehouse.tar.gz chronon/spark-warehouse
309+
when: on_fail
310+
- store_artifacts:
311+
path: /tmp/spark-warehouse.tar.gz
312+
destination: spark_warehouse.tar.gz
313+
when: on_fail
314+
315+
"Mill -- Python API Build":
316+
executor: modern_ubuntu_executor
317+
steps:
318+
- checkout
319+
- install_build_dependencies
320+
- run:
321+
name: Build Python API with Mill
322+
shell: /bin/bash -leuxo pipefail
323+
command: |
324+
source $BASH_ENV
325+
conda activate chronon_py
326+
# Set project root for Mill commands
327+
export CHRONON_ROOT=/chronon
328+
# Generate Python Thrift and build wheel
329+
mill generatePythonThrift
330+
mill buildPythonApi
331+
- store_artifacts:
332+
path: /chronon/api/py/dist
333+
198334
workflows:
199335
build_test_deploy:
200336
jobs:
@@ -221,5 +357,14 @@ workflows:
221357
requires:
222358
- "Pull Docker Image"
223359
- "Scala 13 -- Iceberg Table Utils Tests":
360+
requires:
361+
- "Pull Docker Image"
362+
- "Mill -- Compile All":
363+
requires:
364+
- "Pull Docker Image"
365+
- "Mill -- Tests":
366+
requires:
367+
- "Pull Docker Image"
368+
- "Mill -- Python API Build":
224369
requires:
225370
- "Pull Docker Image"

aggregator/package.mill

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package build.aggregator
2+
3+
import mill._
4+
import mill.javalib._
5+
import mill.javalib.publish._
6+
import mill.scalalib.SbtModule
7+
8+
object `package` extends SbtModule with PublishModule {
9+
10+
def scalaVersion = "2.12.12"
11+
12+
def mvnDeps = Seq(
13+
mvn"com.google.code.gson:gson:2.8.6",
14+
mvn"com.yahoo.datasketches:sketches-core:0.13.4",
15+
mvn"org.apache.commons:commons-lang3:3.12.0"
16+
)
17+
18+
def moduleDeps = super.moduleDeps ++ Seq(build.api)
19+
20+
def compileMvnDeps = Seq(
21+
mvn"org.apache.spark::spark-core:3.1.1",
22+
mvn"org.apache.spark::spark-sql:3.1.1"
23+
)
24+
25+
def pomSettings = PomSettings(
26+
"Chronon is a feature engineering platform",
27+
"ai.chronon",
28+
"https://github.com/airbnb/chronon",
29+
Seq(License(
30+
"Apache 2",
31+
"Apache 2",
32+
"http://www.apache.org/licenses/LICENSE-2.0.txt",
33+
false,
34+
false,
35+
"repo"
36+
)),
37+
VersionControl(
38+
Some("https://github.com/airbnb/chronon"),
39+
Some("scm:[email protected]:airbnb/chronon.git"),
40+
None,
41+
None
42+
),
43+
Seq(Developer(
44+
"nikhilsimha",
45+
"Nikhil Simha",
46+
"http://nikhilsimha.com",
47+
None,
48+
None
49+
))
50+
)
51+
52+
def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"
53+
54+
object test extends SbtTests with TestModule.Junit4 {
55+
56+
def mvnDeps = Seq(
57+
mvn"junit:junit:4.13.2",
58+
mvn"com.novocode:junit-interface:0.11",
59+
mvn"org.scalatest::scalatest:3.2.15",
60+
mvn"org.apache.commons:commons-math3:3.6.1"
61+
)
62+
63+
def moduleDeps = super.moduleDeps ++ Seq(build.api.test)
64+
65+
def testSandboxWorkingDir = false
66+
def testParallelism = false
67+
68+
}
69+
}

api/package.mill

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package build.api
2+
3+
import mill._
4+
import mill.javalib._
5+
import mill.javalib.publish._
6+
import mill.scalalib.SbtModule
7+
8+
object `package` extends SbtModule with PublishModule {
9+
10+
def scalaVersion = "2.12.12"
11+
12+
def mvnDeps = Seq(
13+
mvn"org.apache.thrift:libthrift:0.13.0",
14+
mvn"org.scala-lang.modules::scala-collection-compat:2.6.0",
15+
mvn"org.scala-lang:scala-reflect:2.12.12"
16+
)
17+
18+
def compileMvnDeps = Seq(
19+
mvn"org.apache.spark::spark-core:3.1.1",
20+
mvn"org.apache.spark::spark-sql:3.1.1"
21+
)
22+
23+
def pomSettings = PomSettings(
24+
"Chronon is a feature engineering platform",
25+
"ai.chronon",
26+
"https://github.com/airbnb/chronon",
27+
Seq(License(
28+
"Apache 2",
29+
"Apache 2",
30+
"http://www.apache.org/licenses/LICENSE-2.0.txt",
31+
false,
32+
false,
33+
"repo"
34+
)),
35+
VersionControl(
36+
Some("https://github.com/airbnb/chronon"),
37+
Some("scm:[email protected]:airbnb/chronon.git"),
38+
None,
39+
None
40+
),
41+
Seq(Developer(
42+
"nikhilsimha",
43+
"Nikhil Simha",
44+
"http://nikhilsimha.com",
45+
None,
46+
None
47+
))
48+
)
49+
50+
def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"
51+
52+
/**
53+
* Prepare Scala version-specific sources for Mill compilation.
54+
*
55+
* Mill's security model prevents reading from scala-2.12/ during Task execution,
56+
* so we copy version-specific sources to the main scala/ directory before compilation.
57+
*
58+
* This is a one-time setup command - run it once per clean build:
59+
* ./mill api.prepareScalaSources
60+
*
61+
* SBT users don't need this - sbt automatically picks up scala-2.12/ directories.
62+
*/
63+
def prepareScalaSources() = Task.Command {
64+
val scala212Dir = millSourcePath0 / "src" / "main" / "scala-2.12"
65+
val scalaDir = millSourcePath0 / "src" / "main" / "scala"
66+
67+
os.walk(scala212Dir)
68+
.filter(_.ext == "scala")
69+
.foreach { sourceFile =>
70+
val relativePath = sourceFile.relativeTo(scala212Dir)
71+
val targetFile = scalaDir / relativePath
72+
os.makeDir.all(targetFile / os.up)
73+
os.copy.over(sourceFile, targetFile, createFolders = true)
74+
println(s"✓ Copied: ${sourceFile.last}")
75+
}
76+
println(s"✓ Scala 2.12 sources prepared for Mill compilation")
77+
()
78+
}
79+
80+
// Generate Java sources from Thrift
81+
def generatedSources = Task {
82+
val thriftFile = millSourcePath0 / "thrift" / "api.thrift"
83+
val outDir = Task.dest / "java"
84+
os.remove.all(outDir)
85+
os.makeDir.all(outDir)
86+
os.proc("thrift", "--gen", "java", "-out", outDir, thriftFile)
87+
.call(stdout = os.Inherit)
88+
89+
os.walk(outDir).filter(_.ext == "java").map(PathRef(_))
90+
}
91+
92+
object test extends SbtTests with TestModule.Junit4 {
93+
94+
def mvnDeps = Seq(
95+
mvn"com.novocode:junit-interface:0.11",
96+
mvn"junit:junit:4.13.2",
97+
mvn"org.scalatest::scalatest:3.2.15",
98+
mvn"org.scalatestplus::mockito-3-4:3.2.10.0"
99+
)
100+
101+
def testSandboxWorkingDir = false
102+
def testParallelism = false
103+
104+
}
105+
}

api/py/python-api-build.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
2626
echo "Removing old distributions..."
2727
rm -f $SCRIPT_DIR/dist/*
2828

29+
# Create and activate venv if it doesn't exist
30+
VENV_DIR="$SCRIPT_DIR/../../venv"
31+
if [[ ! -d "$VENV_DIR" ]]; then
32+
echo "Creating Python virtual environment..."
33+
python3 -m venv "$VENV_DIR"
34+
fi
35+
36+
# Activate venv
37+
echo "Activating Python virtual environment..."
38+
source "$VENV_DIR/bin/activate"
39+
40+
# Install build dependencies in venv if needed
41+
if ! python3 -c "import build" 2>/dev/null; then
42+
echo "Installing Python build dependencies..."
43+
pip install --quiet build twine
44+
fi
45+
2946
# The default action is "build"
3047
if [[ -z "${ACTION}" ]] || [[ "${ACTION}" == "build" ]]; then
3148
PYPI_REPOSITORY="internal"

0 commit comments

Comments
 (0)