[[PageOutline]] = Slice notes = Here are some notes about things I like to do when I set up a slice, many of which I learned from Plastic Slices. = Choose what to use = == Slices == Use one of these to identify which slices to use: {{{ slices=$(echo gpoI{15,16}) }}} {{{ slices=$(echo ps{103..110}) }}} {{{ slices=$(echo ps{101..102}) }}} == Logins == Use one of these to create a file and set some variables containing the logins you want to use: {{{ logins cat }}} {{{ logins egrep -h bbn }}} {{{ logins egrep -h -- '(-ig-|-pg-)' }}} Those create ~/tmp/logins-$slicename.txt for each slice, and put all the logins into $logins. They rely on a shell function like this: {{{ logins () { for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; $* ~/slices/*/logins/logins-$slicename.txt >| $loginfile ; done ; logins=$(for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; cat $loginfile ; done) ; } }}} == rspecs == Always do this: {{{ declare -A rspecs }}} Then use something like one of these to identify which rspecs to use: {{{ for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec) ; done }}} {{{ for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep -v openflow) ; done }}} {{{ for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | egrep '(bbn|utah)') ; done }}} Check what you've got: {{{ for slicename in $slices ; do echo ${rspecs[$slicename]} ; done }}} == Credentials == Fetch my user and slice credentials: {{{ (cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done) }}} = Slice and sliver stuff = == Create and renew slices == {{{ for slicename in $slices ; do omni createslice $slicename ; done renewdate='2014-05-15 23:00 UTC' for slicename in $slices ; do omni renewslice $slicename "$renewdate" ; done }}} == Create and renew slivers == {{{ for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am createsliver $slicename $rspec & done ; sleep 30s ; done renewdate='2014-05-15 23:00 UTC' for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done }}} === Utah-specific hackeration === At the various Utah aggregates, you can only renew slivers for up to five days. These days, we have gpoI15 and gpoI16 slivers that won't expire until March of 2015, so we don't need to do this any more! But back when we had to renew things every four days, here's how I used to do it: {{{ slices=$(echo ps{103..110} gpoI{15,16}) (cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done) declare -A rspecs for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep utah | egrep -v '(openflow|vts)') ; done for slicename in $slices ; do echo ${rspecs[$slicename]} ; done renewdate="$(date +%Y-%m-%d -d 'now + 4 days') 23:00 UTC" for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done }}} === ExoGENI-specific hackeration === {{{ slices=$(echo ps{103..110} gpoI{15,16}) (cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done) declare -A rspecs for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep exogeni | grep -v openflow) ; done for slicename in $slices ; do echo ${rspecs[$slicename]} ; done renewdate="$(date +%Y-%m-%d -d 'now + 13 days') 23:00 UTC" for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done }}} == Check sliver expiration == For a few slivers, you can just run the commands and eyeball the results as they fly past. For a lot of slivers, you can stash the results in some files and then analyze them afterwards. === For a few slivers === {{{ for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename |& grep _expir || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done }}} === For lots of slivers === Gather up expiration information, and stuff it into a results file: {{{ for slicename in $slices do cd rm -rf ~/tmp/renewsliver/$slicename mkdir -p ~/tmp/renewsliver/$slicename cd ~/tmp/renewsliver/$slicename for rspec in ${rspecs[$slicename]} ; do outfile=$(echo $(basename $rspec) | sed -e 's/.rspec$//') ; somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename >& $outfile ; done cd ~/tmp/renewsliver/$slicename grep -h _expires * >> results.txt for i in * ; do grep _expires $i > /dev/null || echo "no 'expires' lines in $i" ; done >> results.txt done }}} Look for anomalies in the results files: {{{ cd ~/tmp/renewsliver for slicename in $slices ; do echo "==> $slicename" ; grep foam_expires $slicename/results.txt ; done for slicename in $slices ; do echo "==> $slicename" ; grep orca_expires $slicename/results.txt ; done for slicename in $slices ; do echo "==> $slicename" ; grep pg_expires $slicename/results.txt ; done for slicename in $slices ; do echo "==> $slicename" ; grep "no 'expires' lines" $slicename/results.txt ; done }}} If you find anomalies, you'll probably need to go back to the original output files to figure out where they came from. == Check sliver status == {{{ for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename |& grep _status || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done }}} == Get VTS pseudowire IDs == {{{ for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am listresources $slicename |& grep shared_lan || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done }}} == Get login info == At this point, I'm storing these files in Subversion too, in .../ssh_config, so change into the relevant directory for the collection (jbsN, infra4, plastic-slices, etc) first, e.g. {{{ cd ~/slices/plastic-slices/ssh_config }}} Then, get a file per slice with readyToLogin: {{{ export PYTHONPATH=~/src/gcf-current/src for slicename in $slices ; do ams="" ; for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; ams="$ams -a $am" ; done ; readyToLogin --no-keys --output --prefix=$slicename --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml $ams $slicename ; done for slicename in $slices ; do mv -f $slicename-sshconfig.txt $slicename ; rm -f $slicename*.xml $slicename*.json $slicename-logininfo.txt ; done }}} I should have symlinks in ~/.ssh/config.d pointing to these, so I then just run 'sshconf' to rebuild ~/.ssh/config. == Delete slivers == {{{ for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am deletesliver $slicename & done ; sleep 5s ; done }}} = Login stuff = == Essential one-time steps for any login == Find old SSH keys for IP addresses that ExoGENI has reused, and print lines to remove them: {{{ logins grep -- -eg- for login in $logins ; do ssh $login true |& grep ssh-keygen | sed -e 's/remove with://' ; done }}} Copy and paste the output (simply exec-ing it doesn't seem to work, and I haven't debugged why); then repeat the above and expect no output. Make sure I can log in, and that each login's hostname is as expected: {{{ logins cat shmux -c "hostname" $logins | egrep -v '(.+): \1' }}} Expect no output from that, except possibly messages about new SSH keys. Run it again in that case, and address any other output. Copy in my dotfiles: {{{ for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/.cfhome/ '' ; done }}} (FIXME: This next thing doesn't really belong here, but I haven't figured out where else to put it.) Check for errors from a prsync command: {{{ for slicename in $slices ; do export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; for file in ~/tmp/prsync-errors/$slicename/* ; do test -s $file && echo "==> $file" && cat $file && echo "" ; done ; done }}} = Pingtest stuff = There are two ways to do this, one "fast" one that works ok for small numbers of hosts but not for lots of hosts at once, and one "reliable" one that works consistently even with large numbers of hosts. == Install files == The first of these is for both ways; the last two are only needed for "the reliable way". {{{ for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/slices/*/reachability/addrs-$slicename.conf pingtest.conf ; done shmux -c 'mkdir -p bin' $logins for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/subversion/syseng/geni/share/experiment-setup/plastic-slices/reachability/pingtest bin/pingtest ; done }}} == The fast way == This way uses fping, which is very fast, but seems to be unreliable, more so when there are large numbers of hosts. (I think the parallelizing is overwhelming something -- something OpenFlow-related? A switch? My controller? With ARPs? Hard to tell without digging deeper than I have.) It's still useful for a quick one-host-at-a-time check, but the comprehensive pingtest is pretty unlikely to work at any scale greater than a few hosts. === Run a fast pingtest on one host === {{{ fping -q -c 10 < pingtest.conf |& grep -v "ICMP Host Unreachable" }}} === Run a comprehensive pingtest === {{{ cd rm -rf ~/tmp/pingtest mkdir -p ~/tmp/pingtest cd ~/tmp/pingtest for login in $logins ; do ssh -n $login 'fping -q -c 10 < pingtest.conf |& grep -v "ICMP Host Unreachable"' > $login && echo $login & done }}} === Analyze the results === Show everything that isn't up (i.e. that doesn't have exactly 0% packet loss): {{{ grep -v "/0%" * }}} Show only things that are 100% down: {{{ grep "/100%" * }}} Show only things that have some packet loss other than 0% or 100%: {{{ egrep -v "(/0%|/100%)" * }}} Show everything that isn't up, but exclude things that are known to be down (in this example, Washington and Stanford): {{{ grep -v "/0%" * | egrep -v "(10.42.[0-9]+.(8[01]|9[01])|(washington|stanford))" }}} Show everything that is "up" (0% packet loss): {{{ grep "/0%" * }}} == The reliable way == This way uses a script that pings one host at a time, which takes longer, but seems to be very reliable. === Run a comprehensive pingtest === {{{ cd rm -rf ~/tmp/pingtest mkdir -p ~/tmp/pingtest cd ~/tmp/pingtest for login in $logins ; do ssh -n $login pingtest '$(cat pingtest.conf)' > $login && echo $login & done }}} === Analyze the results === Show everything that isn't "up" (0% packet loss): {{{ grep -v "is up" * }}} Show only things that are "down" (100% packet loss): {{{ grep "is down" * }}} Show only things that are "partial" (some packet packet loss, but not 100%): {{{ grep "is partial" * }}} Show everything that isn't "up', but exclude things that are known to be down (in this example, Washington and Stanford): {{{ grep -v "is up" * | egrep -v "(10.42.[0-9]+.(8[01]|9[01])|(washington|stanford))" }}} = GEMINI stuff = Initialize: {{{ for slicename in $slices ; do ./gdesktop-init.py --certificate=~/.ssl/jbs\@ch.geni.net.pem --project=JBS --slicename=$slicename ; done }}} Instrumentize: {{{ for slicename in $slices ; do ./gdesktop-instrumentize.py --certificate=~/.ssl/jbs\@ch.geni.net.pem --project=JBS --slicename=$slicename ; done }}} = Remote commands = Here are some examples of using shmux to run commands remotely on all logins. == Install fping and iperf == {{{ shmux -c 'sudo $PKGMGR -y install fping iperf' $logins }}} == See what version of iperf is installed == {{{ shmux -c 'pkgversion iperf' $logins }}} = Controller stuff = I generally do this stuff on naxos. == Run NOX 'switch' with a LAVI interface == This runs NOX with the 'switch' module on port 33xxx, and a LAVI interface on port 11XXX. {{{ subnet=017 port=33$subnet ; (cd /usr/bin && /usr/bin/nox_core --info=/home/jbs/nox/nox-${port}.info -i ptcp:$port switch lavi_switches jsonmessenger=tcpport=11$subnet,sslport=0) }}} == Get a list of DPIDs from NOX == This uses a script from Aaron Rosen (from http://paste.pocoo.org/show/555163/) to talk to LAVI and print a list of DPIDs (and pipes them to sort). {{{ subnet=017 ; nox-console -n localhost -p 11$subnet getnodes | sort }}} == Get a list of DPIDs from rspecs == If you have a directory full of rspecs, you can print a list of DPIDs that they include, in the same format as the output from nox-console. (Note that this is pretty naive, and doesn't try to do things like avoid commented-out things, etc.) {{{ cd ~/rspecs/request/jbs15 grep -h component_id openflow-* | esed -e 's/.+datapath\+([^"]+).+$/\1/'|sort }}} = Old stuff = Stuff I was doing at one point, but am not any more. = MyPLC stuff = == Essential one-time steps for each MyPLC login == Set the hostname: {{{ for login in $myplclogins ; do ssh $login sudo hostname $login ; done }}} Enable cron and start it up: {{{ shmux -c 'sudo chkconfig crond on && sudo service crond status | grep running || sudo service crond start' $myplclogins }}} You should only have to do each of those once, but they're safe to repeat. = wapgnode stuff = == Specify which VLAN to fiddle with == Do this before doing any of the other things in this section. {{{ VLAN= }}} == Install and start using a new ifcfg-eth1.VLAN file == {{{ for host in $logins ; do rscp ~/tmp/emulab-wapg-vlan-testing/ifcfg-eth1-$host $host:ifcfg-eth1.$VLAN ; done shmux -c "sudo cp ifcfg-eth1.$VLAN /etc/sysconfig/network-scripts" $logins shmux -c "sudo ifup eth1.$VLAN" $logins shmux -c "ifconfig eth1.$VLAN" $logins }}} == Stop using and delete an old interface == {{{ shmux -c "sudo ifdown eth1.$VLAN" $logins shmux -c "sudo rm /etc/sysconfig/network-scripts/ifcfg-eth1.$VLAN" $logins }}} == Configure FlowVisor == {{{ slicename=jbs17 subnet=017 mkdir -p ~/$slicename/flowvisor cd ~/$slicename/flowvisor fvconfig generate flowvisor-$slicename.xml naxos $(cat /etc/flowvisor/fvpasswd) 33$subnet 8$subnet cd flowvisor ~/$slicename/flowvisor/flowvisor-$slicename.xml >> ~/$slicename/flowvisor/flowvisor-$slicename.log 2>&1 & fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet setConfig 'flowvisor!log_ident' flowvisor$subnet for slice in $(fvctl --url=https://localhost:8$subnet --passwd-file=/etc/flowvisor/fvpasswd listSlices |& grep Slice | grep -v root | awk '{ print $3; }') ; do fvctl --url=https://localhost:8$subnet --passwd-file=/etc/flowvisor/fvpasswd deleteSlice $slice ; done fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet createSlice $slicename tcp:localhost:42$subnet jbs@bbn.com fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet addFlowSpace any 100 dl_type=0x800,nw_dst=10.42.$subnet.0/24 "Slice:$slicename=4" fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet addFlowSpace any 100 dl_type=0x800,nw_src=10.42.$subnet.0/24 "Slice:$slicename=4" }}} == Run a FlowVisor on port 33xxx == {{{ flowvisor ~/$slicename/flowvisor/flowvisor-$slicename.xml >> ~/$slicename/flowvisor/flowvisor-$slicename.log 2>&1 & }}} == Run NOX on port 42xxx == {{{ port=42017 ; (cd /usr/bin && /usr/bin/nox_core --info=/home/jbs/$slicename/nox/nox-${port}.info -i ptcp:$port switch) }}} == Talk to a FlowVisor on a specific port == {{{ fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet listFlowSpace }}} == Kill all the FlowVisors that you own == {{{ pkill -u $USER -f "org.flowvisor.FlowVisor" }}}