Public Repository

Last pushed: 2 years ago
Short Description
ssh accessible container with spark binaries for data scientists/developers
Full Description

Dockerfile

FROM drpaulbrewer/spark-roasted-elephant:latest
MAINTAINER drpaulbrewer@eaftc.com
RUN apt-get --yes install apg openssh-server iputils-ping less emacs24-nox git build-essential clang-3.6 scons screen python-scipy python-numpy
RUN apt-get --yes install sudo
RUN mkdir -p /var/run/sshd 
ADD remove-top-etc-hosts.sh /spark/
ADD start-spark-workstation.sh /spark/
# NO -- Give user spark full sudo rights 
# RUN /usr/sbin/usermod -aG sudo spark
# YES -- Give user spark ability to run "sudo apt-get"
RUN echo spark ALL=NOPASSWD: /usr/bin/apt-get >>/etc/sudoers
CMD ["/bin/bash","/spark/start-spark-workstation.sh"]

Container included script /spark/start-spark-workstation.sh

Note: environment variable $SPARK_WSP will become the spark ssh user password

#!/bin/bash
if [ "$remove_hostname_from_etc_hosts" == "1"]
then /spark/remove-top-etc-hosts
echo "removed top line from /etc/hosts"
fi
echo spark:$SPARK_WSP | chpasswd
echo export SPARK_LOCAL_IP=$SPARK_LOCAL_IP >>/spark/.profile
echo export PATH=$SPARKDIR/bin:$PATH >>/spark/.profile
echo export MASTER=$master >>/spark/.profile
/usr/sbin/sshd -D

Usage script for HOST.
I changed the password here. You should change the password and IP addresses

./run-docker-spark-workstation

#!/bin/bash
sudo -v
SPARK=$(docker run --name="sparkworkstation" --expose=1-65535 --env SPARKDIR=/spark/spark-1.3.1 --env master=spark://192.168.1.10:7077 --env SPARK_LOCAL_IP=192.168.1.17 --env SPARK_WSP="THIS_IS_THE_SSH_PASSWORD" -v /data:/data -v /tmp:/tmp  -d drpaulbrewer/spark-workstation:latest)
sudo pipework eth0 $SPARK 192.168.1.17/24@192.168.1.1
Docker Pull Command
Owner
drpaulbrewer