#Copyright (c) 2015 Raytheon BBN Technologies # #Permission is hereby granted, free of charge, to any person obtaining #a copy of this software and/or hardware specification (the "Work") to #deal in the Work without restriction, including without limitation the #rights to use, copy, modify, merge, publish, distribute, sublicense, #and/or sell copies of the Work, and to permit persons to whom the Work #is furnished to do so, subject to the following conditions: # #The above copyright notice and this permission notice shall be #included in all copies or substantial portions of the Work. # #THE WORK IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS #OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, #WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #OUT OF OR IN CONNECTION WITH THE WORK OR THE USE OR OTHER DEALINGS #IN THE WORK. --- - name: test all hosts hosts: all sudo: True tasks: - name: neuca test shell: neuca-user-data register: neuca_test - debug: var=neuca_test.stdout_lines - name: test if the postboot script is there shell: if [ -f /home/hadoop/hadoop_config_dynamic.sh ] ; then echo "yes" ; else echo "no" ; fi register: file_test failed_when: "'no' in file_test.stdout_lines" - debug: var=file_test.stdout_lines - name: install script has run shell: if grep -q "Hello from neuca script" "/home/hadoop/hadoop_boot.log" ; then echo "yes" ; else echo "no" ; fi register: install_test failed_when: "'no' in install_test.stdout_lines" - debug: var=install_test.stdout_lines - name: test workers hosts: worker-* sudo: True tasks: - name: ping test command: ping -c 5 master register: ping_test - debug: var=ping_test.stdout_lines - name: test master hosts: master sudo: True tasks: - name: check hadoop user is present shell: "cut -d: -f1 /etc/passwd" register: user_test failed_when: "'hadoop' not in user_test.stdout" - debug: var=user_test.stdout - name: check formating the hadoop filesystem become: True become_user: hadoop shell: cd ~; source ~/.bashrc; hdfs namenode -format -force register: hdfs_format failed_when: "'SHUTDOWN_MSG: Shutting down NameNode at master/172.16.1.1' not in hdfs_format.stderr" async: 600 poll: 30 - debug: var=hdfs_format.stderr - name: start dfs become: True become_user: hadoop shell: cd ~; source ~/.bashrc; start-dfs.sh register: dfs_start async: 600 poll: 10 - debug: var=dfs_start.stderr - name: start yarn become: True become_user: hadoop shell: cd ~; source ~/.bashrc; start-yarn.sh register: yarn_start async: 600 poll: 10 - debug: var=yarn_start.stdout - debug: var=yarn_start.stderr - name: hdfs report become: True become_user: hadoop shell: cd ~; source ~/.bashrc; hdfs dfsadmin -report register: hdfs_report async: 600 poll: 5 - debug: var=hdfs_report.stdout - debug: var=hdfs_report.stderr - name: small test become: True become_user: hadoop shell: "cd ~; source ~/.bashrc; echo Hello GENI World > /tmp/hello.txt; hdfs dfs -put /tmp/hello.txt /hello.txt; hdfs dfs -ls /; hdfs dfs -cat /hello.txt" register: small_test async: 600 poll: 5 failed_when: "'-rw-r--r-- 2 hadoop supergroup' not in small_test.stdout" - debug: var=small_test.stdout - debug: var=small_test.stderr # Need to activate async when ansible 2.0 is out and fixes : https://github.com/ansible/ansible/issues/9798 # XXX When this is does remove timeout from shell hadoop command - name: generate dataset become: True become_user: hadoop shell: "cd ~; source ~/.bashrc; timeout 600 hadoop jar /home/hadoop/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar teragen 10000000 /input" # async: 600 # poll: 20 register: gen_dataset failed_when: "'Bytes Written=1000000000' not in gen_dataset.stderr" - debug: var=gen_dataset.stderr - debug: var=gen_dataset.stdout # Need to activate async when ansible 2.0 is out and fixes : https://github.com/ansible/ansible/issues/9798 # XXX When this is does remove timeout from shell hadoop command - name: sort dataset become: True become_user: hadoop shell: "cd ~; source ~/.bashrc; timeout 600 hadoop jar /home/hadoop/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar terasort /input /output" register: sort_dataset # async: 600 failed_when: "'INFO terasort.TeraSort: done' not in sort_dataset.stderr" - debug: var=sort_dataset.stderr - name: verify files become: True become_user: hadoop shell: "cd ~; source ~/.bashrc; hdfs dfs -ls /input/; hdfs dfs -ls /output/; hdfs dfs -cat /input/part-m-00000 | head; hdfs dfs -cat /output/part-r-00000 | head" register: verify_files async: 600 poll: 10 - debug: var=verify_files.stdout - debug: var=verify_files.stderr # Need to activate async when ansible 2.0 is out and fixes : https://github.com/ansible/ansible/issues/9798 # XXX When this is does remove timeout from shell execution - name: hexdump become: True become_user: hadoop shell: "cd ~; source ~/.bashrc; timeout 600 hdfs dfs -get /output/part-r-00000 /tmp/part-r-00000; hexdump /tmp/part-r-00000 | head" register: hexdump # async: 600 # poll: 30 - debug: var=hexdump.stdout - debug: var=hexdump.stderr