Public Repository

Last pushed: 3 years ago
Short Description
ssh accessible container with spark binaries for data scientists/developers
Full Description


FROM drpaulbrewer/spark-roasted-elephant:latest
RUN apt-get --yes install apg openssh-server iputils-ping less emacs24-nox git build-essential clang-3.6 scons screen python-scipy python-numpy
RUN apt-get --yes install sudo
RUN mkdir -p /var/run/sshd 
ADD /spark/
ADD /spark/
# NO -- Give user spark full sudo rights 
# RUN /usr/sbin/usermod -aG sudo spark
# YES -- Give user spark ability to run "sudo apt-get"
RUN echo spark ALL=NOPASSWD: /usr/bin/apt-get >>/etc/sudoers
CMD ["/bin/bash","/spark/"]

Container included script /spark/

Note: environment variable $SPARK_WSP will become the spark ssh user password

if [ "$remove_hostname_from_etc_hosts" == "1"]
then /spark/remove-top-etc-hosts
echo "removed top line from /etc/hosts"
echo spark:$SPARK_WSP | chpasswd
echo export SPARK_LOCAL_IP=$SPARK_LOCAL_IP >>/spark/.profile
echo export PATH=$SPARKDIR/bin:$PATH >>/spark/.profile
echo export MASTER=$master >>/spark/.profile
/usr/sbin/sshd -D

Usage script for HOST.
I changed the password here. You should change the password and IP addresses


sudo -v
SPARK=$(docker run --name="sparkworkstation" --expose=1-65535 --env SPARKDIR=/spark/spark-1.3.1 --env master=spark:// --env SPARK_LOCAL_IP= --env SPARK_WSP="THIS_IS_THE_SSH_PASSWORD" -v /data:/data -v /tmp:/tmp  -d drpaulbrewer/spark-workstation:latest)
sudo pipework eth0 $SPARK
Docker Pull Command