Public Repository

Last pushed: 4 months ago
Short Description
Docker image including hadoop, spark, scala.
Full Description

FROM openjdk:8

######## SCALA / SPARK ########

# Scala related variables.
ARG SCALA_VERSION=2.12.2
ARG SCALA_BINARY_ARCHIVE_NAME=scala-${SCALA_VERSION}
ARG SCALA_BINARY_DOWNLOAD_URL=http://downloads.lightbend.com/scala/${SCALA_VERSION}/${SCALA_BINARY_ARCHIVE_NAME}.tgz

# SBT related variables.
ARG SBT_VERSION=0.13.15
ARG SBT_BINARY_ARCHIVE_NAME=sbt-$SBT_VERSION
ARG SBT_BINARY_DOWNLOAD_URL=https://dl.bintray.com/sbt/native-packages/sbt/${SBT_VERSION}/${SBT_BINARY_ARCHIVE_NAME}.tgz

# Spark related variables.
ARG SPARK_VERSION=2.2.0
ARG SPARK_BINARY_ARCHIVE_NAME=spark-${SPARK_VERSION}-bin-hadoop2.7
ARG SPARK_BINARY_DOWNLOAD_URL=http://d3kbcqa49mib13.cloudfront.net/${SPARK_BINARY_ARCHIVE_NAME}.tgz

# Configure env variables for Scala, SBT and Spark.
# Also configure PATH env variable to include binary folders of Java, Scala, SBT and Spark.
ENV SCALA_HOME /usr/local/scala
ENV SBT_HOME /usr/local/sbt
ENV SPARK_HOME /usr/local/spark
ENV PATH $JAVA_HOME/bin:$SCALA_HOME/bin:$SBT_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH

# Download, uncompress and move all the required packages and libraries to their corresponding directories in /usr/local/ folder.
RUN apt-get -yqq update && \
apt-get install -yqq vim screen tmux && \
apt-get clean && \
rm -rf /var/lib/apt/lists/ && \
rm -rf /tmp/
&& \
wget -qO - ${SCALA_BINARY_DOWNLOAD_URL} | tar -xz -C /usr/local/ && \
wget -qO - ${SBT_BINARY_DOWNLOAD_URL} | tar -xz -C /usr/local/ && \
wget -qO - ${SPARK_BINARY_DOWNLOAD_URL} | tar -xz -C /usr/local/ && \
cd /usr/local/ && \
ln -s ${SCALA_BINARY_ARCHIVE_NAME} scala && \
ln -s ${SPARK_BINARY_ARCHIVE_NAME} spark && \
cp spark/conf/log4j.properties.template spark/conf/log4j.properties && \
mkdir /tmp/spark-events && \
sed -i -e s/WARN/ERROR/g spark/conf/log4j.properties && \
sed -i -e s/INFO/ERROR/g spark/conf/log4j.properties

######## HADOOP ########

# https://hadoop.apache.org/docs/r3.0.0/
# https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-common/ClusterSetup.html
ADD hadoop-3.0.0 /hadoop-3.0.0
ENV HADOOP_HOME /hadoop-3.0.0
RUN chmod +x $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN $HADOOP_HOME/etc/hadoop/hadoop-env.sh
ENV PATH $PATH:$HADOOP_HOME/bin
# for namenode
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
# for datanode
ADD hdfs-site.xml.template $HADOOP_HOME/etc/hadoop/hdfs-site.xml.template
# for namenode & datanode
ADD core-site.xml.template $HADOOP_HOME/etc/hadoop/core-site.xml.template

######## APPLICATION ########

ADD MapR_fs_job-0.0.1-SNAPSHOT.jar /MapR_fs_job-0.0.1-SNAPSHOT.jar

CMD ["/bin/bash"]

Docker Pull Command
Owner
activeeon