Public Repository

Last pushed: 2 years ago
Short Description
ubuntu 14.10, hadoop 2.6.0 (single node setup), hive 1.0
Full Description

Hadoop 2.6.0 / hive 1.0 / ubuntu 14.10

Pull the image

sudo docker pull cdecl/hadoop-hive:2.6.0

Start a container

sudo docker run -i -t  cdecl/hadoop-hive:2.6.0 /etc/bootstrap.sh -bash

root@0bfa9279e282:/# jps
129 NameNode
243 DataNode
392 SecondaryNameNode
617 NodeManager
938 Jps
527 ResourceManager

Testing


# /root/data/cities.csv 
ls /root/data

# mysql start 
# root passrword : hivepass 
service mysql restart  

# hive start 
hive

# create table for cvs file 
hive> CREATE TABLE cities (country string, r1 string, r2 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

# load data from local file 
hive> LOAD DATA LOCAL INPATH '/root/data/cities.csv' OVERWRITE INTO TABLE cities;

# table describe
hive> DESCRIBE cities;
OK
country                 string
r1                      string
r2                      string

# select top 4 rows 
hive> SELECT * FROM cities LIMIT 4;
OK
Afghanistan     Badaẖšan        Eškašem
Afghanistan     Badaẖšan        Fayzābād
Afghanistan     Badaẖšan        H̱andūd
Afghanistan     Badaẖšan        Jurm

# select count : 317102
hive> SELECT COUNT(*) FROM cities;

# select grouping data 
hive> SELECT country, COUNT(*) cnt FROM cities GROUP BY country;

# dfs command 
hive> dfs -ls /user/hive/warehouse/cities ;

Docker Pull Command
Owner
cdecl