diff --git a/.gitignore b/.gitignore index faada9c8a..73df90509 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ bld/ # Visual Studio 2015/2017 cache/options directory .vs/ +# Visual Studio Code cache/options directory +.vscode/ # Uncomment if you have tasks that create the project's static files in wwwroot #wwwroot/ diff --git a/binder/Dockerfile b/binder/Dockerfile new file mode 100644 index 000000000..cf9401047 --- /dev/null +++ b/binder/Dockerfile @@ -0,0 +1 @@ +FROM mcr.microsoft.com/dotnet-spark:2.4.6-0.12.1-interactive diff --git a/binder/README.md b/binder/README.md new file mode 100644 index 000000000..926671da2 --- /dev/null +++ b/binder/README.md @@ -0,0 +1,7 @@ +# .NET for Apache Spark Interactive + +This interactive notebook contains allows you to explore .NET for Apache Spark in your web-browser. + +To launch it, just click the button below: + +[![Binder](./dotnet-spark-binder.svg)](https://mybinder.org/v2/gh/indy-3rdman/spark/docker_images_init?urlpath=lab/tree/nb/) diff --git a/binder/dotnet-spark-binder.svg b/binder/dotnet-spark-binder.svg new file mode 100644 index 000000000..121abdcf8 --- /dev/null +++ b/binder/dotnet-spark-binder.svg @@ -0,0 +1 @@ +launchlaunchSpakr.NET interactiveSpark.NET interactive diff --git a/docker/images/dev/Dockerfile b/docker/images/dev/Dockerfile new file mode 100644 index 000000000..c006abe01 --- /dev/null +++ b/docker/images/dev/Dockerfile @@ -0,0 +1,52 @@ +ARG SDK_IMAGE_TAG=3.1-bionic +FROM mcr.microsoft.com/dotnet/core/sdk:$SDK_IMAGE_TAG +LABEL maintainer="Martin Kandlbinder " + +ARG DEBIAN_FRONTEND=noninteractive +ARG HADOOP_VERSION=2.7 +ARG MAVEN_VERSION=3.6.3 +ARG SPARK_VERSION=3.0.1 + +ENV DAEMON_RUN=true \ + M2_HOME=/usr/local/bin/maven/current \ + SPARK_HOME=/spark +ENV PATH="${PATH}:${SPARK_HOME}/bin:${M2_HOME}/bin" + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-utils \ + ca-certificates \ + dialog \ + openjdk-8-jdk \ + software-properties-common \ + supervisor \ + unzip \ + wget \ + && add-apt-repository universe \ + && apt-get install -y apt-transport-https \ + && apt-get update \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /usr/local/bin/maven \ + && cd /usr/local/bin/maven \ + && echo "Downloading apache-maven-${MAVEN_VERSION}-bin.tar.gz ..." \ + && wget -q https://www-us.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz \ + && tar -xvzf apache-maven-${MAVEN_VERSION}-bin.tar.gz \ + && ln -s apache-maven-${MAVEN_VERSION} current \ + && rm apache-maven-${MAVEN_VERSION}-bin.tar.gz \ + && git clone https://github.com/dotnet/spark.git ~/dotnet.spark \ + && cp ~/dotnet.spark/NuGet.config ~/.nuget/NuGet/NuGet.Config + +RUN echo "Downloading spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz ..." \ + && wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ + && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ + && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \ + && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + +RUN curl -fsSL https://code-server.dev/install.sh | sh \ + && mkdir -p ~/.config/code-server \ + && echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert : false\n" > ~/.config/code-server/config.yaml + +COPY supervisor.conf /etc/supervisor.conf + +CMD ["supervisord", "-c", "/etc/supervisor.conf"] diff --git a/docker/images/dev/README.md b/docker/images/dev/README.md new file mode 100644 index 000000000..af5145e29 --- /dev/null +++ b/docker/images/dev/README.md @@ -0,0 +1,87 @@ +# .NET for Apache Spark dev Docker image + +## Description + +This directory contains the source code to build a docker development image. +Using this image, you can compile .NET for Apache Spark yourself. + +If you do not want to build those images yourself, you can get our pre-built images directly from docker hub at [https://hub.docker.com/r/3rdman/dotnet-spark](https://hub.docker.com/r/3rdman/dotnet-spark). + +## Building + +To build a dev image, just run the [build.sh](build.sh) bash script. The default Apache Spark and Maven versions used to build the image are defined in the script. + +You can also specify different versions, by using one of the following command line options: + +```bash + -a, --apache-spark + -m, --maven +``` + +For more details please run + +```bash +build.sh -h +``` + +Please note, that not all version combinations are supported, however. + +## Docker Run Example + +As mentioned earlier, the dotnet-spark runtime image can be used in multiple ways. Below are some examples that might be useful. + +```bash +docker run --name dotnet-spark-dev -d 3rdman/dotnet-spark:dev-latest +``` + +## Using the image to build from source + +- ### Docker interactive terminal + +The most basic way to build .NET for Apache Spark from source is to connect to the started container using the following docker command + +```bash +docker exec -it dotnet-spark-dev bash +``` + +As the image comes with a clone of the GitHub repository already, just navigate into the local git repository folder and ensure that you have the most recent version of the source available. + +```bash +cd ~/dotnet.spark +git pull +``` + +Now you can start with [Building Spark .NET Scala Extensions Layer](https://github.com/dotnet/spark/blob/master/docs/building/ubuntu-instructions.md#building). + +If you prefer a more GUI based approach, there are a couple of options available as well. + +- ### Using [code-server](https://github.com/cdr/code-server) + +The image comes with code-server installed, which allows you run Visual Studio Code and build .NET for Apache Spark in a web-browser. + +First, start a container from the dev image and map the code-server port to a host port that is reachable via the loopback address only. + +```bash +docker run --name dotnet-spark-dev -d -p 127.0.0.1:8888:8080 3rdman/dotnet-spark:dev-latest +``` + +![launch](img/dotnet-dev-docker-code-server-launch.gif) + +Now use a web-browser to connect to http://127.0.0.1:8888. This will open Visual Studio Code in the browser. After opening the dotnet.spark repository folder and a new Terminal windows, you can with to the scala source folder and start the build process. + +```bash +cd src/scala +mvn clean package +``` + +![build](img/dotnet-dev-docker-code-server-build.gif) + +- ### VS Code & remote containers + +You may also want to give the Docker- and Remote Containers extensions for Visual Studio Code a try + +![vs-code-docker-extension](img/vscode-docker-extension.png) + +![vs-code-remote-containers-extension](img/vscode-remote-containers-extension.png) + +![build](img/dotnet-dev-docker-vscode-remote.gif) diff --git a/docker/images/dev/build.sh b/docker/images/dev/build.sh new file mode 100755 index 000000000..2e1d1704f --- /dev/null +++ b/docker/images/dev/build.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +# Create different versions of the .NET for Apache Spark dev docker image +# based on the Apach Spark and .NET for Apache Spark version. + +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes + +readonly image_repository='3rdman' +readonly supported_apache_spark_versions=( + "2.3.0" "2.3.1" "2.3.2" "2.3.3" "2.3.4" + "2.4.0" "2.4.1" "2.4.3" "2.4.4" "2.4.5" "2.4.6" "2.4.7" + "3.0.0" "3.0.1" + ) +readonly supported_maven_versions=("3.6.3") +readonly hadoop_version=2.7 +readonly sdk_image_tag="3.1-bionic" + +maven_version=3.6.3 +apache_spark_version=3.0.1 + +main() { + # Parse the options an set the related variables + while [[ "$#" -gt 0 ]]; do + case "${1}" in + -a|--apache-spark) opt_check_apache_spark_version "${2}"; shift ;; + -m|--maven) opt_check_maven_version "${2}"; shift ;; + -h|--help) print_help + exit 1 ;; + *) echo "Unknown parameter passed: ${1}"; exit 1 ;; + esac + shift + done + + echo "Building dev image with Apache Spark ${apache_spark_version} and Maven ${maven_version}" + + local image_name="${image_repository}/dotnet-spark:${apache_spark_version}-dev" + + build_image "${image_name}" + + exit 0 +} + +####################################### +# Checks if the provided Apache Spark version number is supported +# Arguments: +# The version number string +# Result: +# Sets the global variable apache_spark_version if supported, +# otherwise exits with a related message +####################################### +opt_check_apache_spark_version() { + local provided_version="${1}" + local valid_version="" + + for value in "${supported_apache_spark_versions[@]}" + do + [[ "${provided_version}" = "$value" ]] && valid_version="${provided_version}" + done + + if [ -z "${valid_version}" ] + then + echo "${provided_version} is an unsupported Apache Spark version." + exit 1 ; + else + apache_spark_version="${valid_version}" + fi +} + +####################################### +# Checks if the provided Maven version number is supported +# Arguments: +# The version number string +# Result: +# Sets the global variable maven_version if supported, +# otherwise exits with a related message +#############################maven() +opt_check_maven_version() { + local provided_version="${1}" + local valid_version="" + + for value in "${supported_maven_versions[@]}" + do + [[ "${provided_version}" = "$value" ]] && valid_version="${provided_version}" + done + + if [ -z "${valid_version}" ] + then + echo "${provided_version} is an unsupported maven version." + exit 1 ; + else + maven_version="${valid_version}" + fi +} + + +####################################### +# Runs the docker build command with the related build arguments +# Arguments: +# The image name (incl. tag) +# Result: +# A local docker image with the specified name +####################################### +build_image() { + local image_name="${1}" + local build_args="--build-arg MAVEN_VERSION=${maven_version} \ + --build-arg SPARK_VERSION=${apache_spark_version} \ + --build-arg HADOOP_VERSION=${hadoop_version} \ + --build-arg SDK_IMAGE_TAG=${sdk_image_tag}" + local cmd="docker build ${build_args} -t ${image_name} ." + + echo "Building ${image_name}" + + ${cmd} +} + + +####################################### +# Display the help text +####################################### +print_help() { + cat <