cloudscript/hadoop.cloudscript at master · muraliselva10/cloudscript · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# Install latest Cloudera Hadoop distribution on a CS2.1-SSD Cloudlet ($.023/hr) running Linux Ubuntu Server 14.04 LTS 64-bit
cloudscript hadoop_single_node
    version                 = _latest
    result_template         = hadoop_result_template

globals
    hadoop_hostname         = 'hadoop'
    hadoop_instance_type    = 'CS2.1-SSD'    # 2GB RAM, 1 vCore(s), 37GB SSD, 10Gbps
    hadoop_image_type       = 'Ubuntu Server 14.04 LTS'
    hadoop_version          = 'hadoop-2.6.0'
    server_password         = lib::random_password()
    console_password        = lib::random_password()
    hadoop_slice_user       = 'hadoop'

thread hadoop_setup
    tasks                   = [hadoop_node_setup]

task hadoop_node_setup

    #-------------------------------
    # create hadoop keys
    #-------------------------------

    # create hadoop server root password key
    /key/password hadoop_server_password_key read_or_create
        key_group       = _SERVER
        password        = server_password

    # create hadoop server console key
    /key/password hadoop_server_console_key read_or_create
        key_group       = _CONSOLE
        password        = console_password

    # create storage slice keys
    /key/token hadoop_slice_key read_or_create
        username        = hadoop_slice_user

    #-------------------------------
    # create hadoop bootstrap
    # script and recipe
    #-------------------------------

    # create slice to store script in cloudstorage
    /storage/slice hadoop_slice read_or_create
        keys            = [hadoop_slice_key]

    # create slice container to store script in cloudstorage
    /storage/container hadoop_container => [hadoop_slice] read_or_create
        slice           = hadoop_slice

    # place script data in cloudstorage
    /storage/object hadoop_bootstrap_object => [hadoop_slice, hadoop_container] read_or_create
        container_name  = 'hadoop_container'
        file_name       = 'bootstrap_hadoop.sh'
        slice           = hadoop_slice
        content_data    = hadoop_bootstrap_data

    # associate the cloudstorage object with the hadoop script
    /orchestration/script hadoop_bootstrap_script => [hadoop_slice, hadoop_container, hadoop_bootstrap_object] read_or_create
        data_uri        = 'cloudstorage://hadoop_slice/hadoop_container/bootstrap_hadoop.sh'
        script_type     = _SHELL
        encoding        = _STORAGE

    # create the recipe and associate the script
    /orchestration/recipe hadoop_bootstrap_recipe read_or_create
        scripts         = [hadoop_bootstrap_script]

    #-------------------------------
    # create the hadoop node
    #-------------------------------

    /server/cloud hadoop_server read_or_create
        hostname        = '{{ hadoop_hostname }}'
        image           = '{{ hadoop_image_type }}'
        service_type    = '{{ hadoop_instance_type }}'
        keys            = [hadoop_server_password_key, hadoop_server_console_key]
        recipes         = [hadoop_bootstrap_recipe]

text_template hadoop_bootstrap_data
#!/bin/sh

#-------------------------------
# Verify the pre-requisites
# before installing Hadoop.
#-------------------------------

test `whoami` = 'root' || echo "You must be root to execute the commands."

# Update the source list
apt-get update

# The OpenJDK project is the default version of Java
# that is provided from a supported Ubuntu repository.
apt-get install default-jdk -y

#adding a dedicated hadoop user
addgroup hadoop
echo "{{ hadoop_server_password_key.password }}" | useradd -g hadoop -m hduser

#Installing SSH
apt-get install ssh -y

#Create and install ssh certs
su - hduser -c 'ssh-keygen -t rsa -N "" -f $HOME/.ssh/id_rsa'
su - hduser -c 'cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys'

#Change ssh key add policy
sed -r 's/#   StrictHostKeyChecking ask/    StrictHostKeyChecking no/g' /etc/ssh/ssh_config > /etc/ssh/ssh_conf
mv /etc/ssh/ssh_conf /etc/ssh/ssh_config

#Download and install hadoop
wget http://mirrors.sonic.net/apache/hadoop/common/{{ hadoop_version }}/{{ hadoop_version }}.tar.gz
tar xzf {{ hadoop_version }}.tar.gz
mv {{ hadoop_version }} /usr/local/hadoop
chown -R hduser:hadoop /usr/local/hadoop

#Prepare initial config
JAVA_PATH=`update-alternatives --config java |grep java |awk '{print $NF}' | sed -r 's@/jre/bin/java@@g'`

echo "export JAVA_HOME="$JAVA_PATH >> /home/hduser/.bashrc
cat <<\EOC>>/home/hduser/.bashrc
export HADOOP_INSTALL=/usr/local/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib"
EOC

sed -r "s@export JAVA_HOME=.*@export JAVA_HOME=$JAVA_PATH@g" /usr/local/hadoop/etc/hadoop/hadoop-env.sh > /usr/local/hadoop/etc/hadoop/hadoop-env_tmp.sh
mv /usr/local/hadoop/etc/hadoop/hadoop-env_tmp.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh

mkdir -p /usr/local/hadoop/app/hadoop/tmp
chown hduser:hadoop /usr/local/hadoop/app/hadoop/tmp

cat <<\EOC>/usr/local/hadoop/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
 <property>
  <name>hadoop.tmp.dir</name>
  <value>/usr/local/hadoop/app/hadoop/tmp</value>
  <description>A base for other temporary directories.</description>
 </property>

 <property>
  <name>fs.default.name</name>
  <value>hdfs://localhost:54310</value>
  <description>The name of the default file system.  A URI whose
  scheme and authority determine the FileSystem implementation.  The
  uri's scheme determines the config property (fs.SCHEME.impl) naming
  the FileSystem implementation class.  The uri's authority is used to
  determine the host, port, etc. for a filesystem.</description>
 </property>
</configuration>
EOC

cat <<\EOC>/usr/local/hadoop/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
  <name>mapred.job.tracker</name>
  <value>localhost:54311</value>
  <description>The host and port that the MapReduce job tracker runs
  at.  If "local", then jobs are run in-process as a single map
  and reduce task.
  </description>
 </property>
</configuration>
EOC

mkdir -p /usr/local/hadoop_store/hdfs/namenode
mkdir -p /usr/local/hadoop_store/hdfs/datanode
chown -R hduser:hadoop /usr/local/hadoop_store

cat <<\EOC>/usr/local/hadoop/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
 <property>
  <name>dfs.replication</name>
  <value>1</value>
  <description>Default block replication.
  The actual number of replications can be specified when the file is created.
  The default is used if replication is not specified in create time.
  </description>
 </property>
 <property>
   <name>dfs.namenode.name.dir</name>
   <value>file:/usr/local/hadoop_store/hdfs/namenode</value>
 </property>
 <property>
   <name>dfs.datanode.data.dir</name>
   <value>file:/usr/local/hadoop_store/hdfs/datanode</value>
 </property>
</configuration>
EOC

#Format the new hadoop filesystem
su - hduser -c "/usr/local/hadoop/bin/hdfs namenode -format"

#Starting up hadoop
su - hduser -c "/usr/local/hadoop/sbin/start-dfs.sh"
su - hduser -c "/usr/local/hadoop/sbin/start-yarn.sh"

#automate startup
sed -r 's@exit 0@su - hduser -c "/usr/local/hadoop/sbin/start-dfs.sh"@g' /etc/rc.local > /etc/rc.local_tmp
mv /etc/rc.local_tmp /etc/rc.local
echo 'su - hduser -c "/usr/local/hadoop/sbin/start-yarn.sh"' >> /etc/rc.local
echo 'exit 0' >> /etc/rc.local

_eof

text_template hadoop_result_template

Thank you for provisioning a hadoop node setup.

You can login to the master server directly via SSH by connecting
to root@{{ hadoop_server.ipaddress_public }} using the password:

{{ hadoop_server_password_key.password }}

You can also access the status of your HDFS cluster
on the web at the following URL:

http://{{ hadoop_server.ipaddress_public }}:50070/

_eof