Changes between Initial Version and Version 1 of JBSsandbox/SliceNotes


Ignore:
Timestamp:
03/18/14 21:14:00 (10 years ago)
Author:
Josh Smift
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • JBSsandbox/SliceNotes

    v1 v1  
     1[[PageOutline]]
     2
     3= Slice notes =
     4
     5Here are some notes about things I like to do when I set up a slice, many of which I learned from Plastic Slices.
     6
     7= Choose what to use =
     8
     9== Slices ==
     10
     11Use one of these to identify which slices to use:
     12
     13{{{
     14slices=$(echo gpoI{15,16})
     15}}}
     16
     17{{{
     18slices=$(echo ps{103..110})
     19}}}
     20
     21{{{
     22slices=$(echo ps{101..102})
     23}}}
     24
     25== Logins ==
     26
     27Use one of these to create a file and set some variables containing the logins you want to use:
     28
     29{{{
     30logins cat
     31}}}
     32
     33{{{
     34logins egrep -h bbn
     35}}}
     36
     37{{{
     38logins egrep -h -- '(-ig-|-pg-)'
     39}}}
     40
     41Those create ~/tmp/logins-$slicename.txt for each slice, and put all the logins into $logins. They rely on a shell function like this:
     42
     43{{{
     44logins () { for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; $* ~/slices/*/logins/logins-$slicename.txt >| $loginfile ; done ; logins=$(for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; cat $loginfile ; done) ; }
     45}}}
     46
     47== rspecs ==
     48
     49Always do this:
     50
     51{{{
     52declare -A rspecs
     53}}}
     54
     55Then use something like one of these to identify which rspecs to use:
     56
     57{{{
     58for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec) ; done
     59}}}
     60
     61{{{
     62for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep -v openflow) ; done
     63}}}
     64
     65{{{
     66for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | egrep '(bbn|utah)') ; done
     67}}}
     68
     69Check what you've got:
     70
     71{{{
     72for slicename in $slices ; do echo ${rspecs[$slicename]} ; done
     73}}}
     74
     75== Credentials ==
     76
     77Fetch my user and slice credentials:
     78
     79{{{
     80(cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done)
     81}}}
     82
     83= Slice and sliver stuff =
     84 
     85== Create and renew slices ==
     86
     87{{{
     88for slicename in $slices ; do omni createslice $slicename ; done
     89
     90renewdate='2014-05-15 23:00 UTC'
     91for slicename in $slices ; do omni renewslice $slicename "$renewdate" ; done
     92}}}
     93 
     94== Create and renew slivers ==
     95
     96{{{
     97for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am createsliver $slicename $rspec & done ; sleep 30s ; done
     98
     99renewdate='2014-05-15 23:00 UTC'
     100for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done
     101}}}
     102
     103=== Utah-specific hackeration ===
     104
     105{{{
     106slices=$(echo ps{103..110} gpoI{15,16})
     107(cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done)
     108declare -A rspecs
     109for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep utah | grep -v openflow) ; done
     110for slicename in $slices ; do echo ${rspecs[$slicename]} ; done
     111
     112renewdate="$(date +%Y-%m-%d -d 'now + 4 days') 23:00 UTC"
     113for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done
     114}}}
     115
     116=== ExoGENI-specific hackeration ===
     117
     118{{{
     119slices=$(echo ps{103..110} gpoI{15,16})
     120(cd ~/.gcf ; omni getusercred -o ; for slicename in $slices ; do omni getslicecred $slicename -o ; done)
     121declare -A rspecs
     122for slicename in $slices ; do rspecs[$slicename]=$(ls -1 ~/rspecs/request/$slicename/*.rspec | grep exogeni | grep -v openflow) ; done
     123for slicename in $slices ; do echo ${rspecs[$slicename]} ; done
     124
     125renewdate="$(date +%Y-%m-%d -d 'now + 13 days') 23:00 UTC"
     126for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am renewsliver $slicename "$renewdate" & done ; sleep 5s ; done
     127}}}
     128
     129== Check sliver expiration ==
     130
     131For a few slivers, you can just run the commands and eyeball the results as they fly past. For a lot of slivers, you can stash the results in some files and then analyze them afterwards.
     132
     133=== For a few slivers ===
     134
     135{{{
     136for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename |& grep _expir || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done
     137}}}
     138
     139=== For lots of slivers ===
     140
     141Gather up expiration information, and stuff it into a results file:
     142
     143{{{
     144for slicename in $slices
     145do
     146  cd
     147  rm -rf ~/tmp/renewsliver/$slicename
     148  mkdir -p ~/tmp/renewsliver/$slicename
     149  cd ~/tmp/renewsliver/$slicename
     150  for rspec in ${rspecs[$slicename]} ; do outfile=$(echo $(basename $rspec) | sed -e 's/.rspec$//') ; somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename >& $outfile ; done
     151  cd ~/tmp/renewsliver/$slicename
     152  grep -h _expires * >> results.txt
     153  for i in * ; do grep _expires $i > /dev/null || echo "no 'expires' lines in $i" ; done >> results.txt
     154done
     155}}}
     156
     157Look for anomalies in the results files:
     158
     159{{{
     160cd ~/tmp/renewsliver
     161for slicename in $slices ; do echo "==> $slicename" ; grep foam_expires $slicename/results.txt ; done
     162for slicename in $slices ; do echo "==> $slicename" ; grep orca_expires $slicename/results.txt ; done
     163for slicename in $slices ; do echo "==> $slicename" ; grep pg_expires $slicename/results.txt ; done
     164for slicename in $slices ; do echo "==> $slicename" ; grep "no 'expires' lines" $slicename/results.txt ; done
     165}}}
     166
     167If you find anomalies, you'll probably need to go back to the original output files to figure out where they came from.
     168
     169== Check sliver status ==
     170
     171{{{
     172for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am sliverstatus $slicename |& grep _status || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done
     173}}}
     174
     175== Get VTS pseudowire IDs ==
     176
     177{{{
     178for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am listresources $slicename |& grep shared_lan || echo "no sliver" ; echo "is for $slicename @ $am" ; done ; done
     179}}}
     180
     181== Get login info ==
     182
     183At this point, I'm storing these files in Subversion too, in .../ssh_config, so change into the relevant directory for the collection (jbsN, infra4, plastic-slices, etc) first.
     184
     185Then, get a file per slice with readyToLogin:
     186
     187{{{
     188export PYTHONPATH=~/src/gcf-current/src
     189for slicename in $slices ; do ams="" ; for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; ams="$ams -a $am" ; done ; readyToLogin --no-keys --output --prefix=$slicename --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml $ams $slicename ; done
     190for slicename in $slices ; do mv -f $slicename-sshconfig.txt $slicename ; rm -f $slicename*.xml $slicename*.json $slicename-logininfo.txt ; done
     191}}}
     192
     193I should have symlinks in ~/.ssh/config.d pointing to these, so I then just run 'sshconf' to rebuild ~/.ssh/config.
     194
     195== Delete slivers ==
     196
     197{{{
     198for slicename in $slices ; do for rspec in ${rspecs[$slicename]} ; do somni $slicename $rspec ; omni --usercredfile=$HOME/.gcf/$USER-geni-usercred.xml --slicecredfile=$HOME/.gcf/$slicename-cred.xml -a $am deletesliver $slicename & done ; sleep 5s ; done
     199}}}
     200
     201= Login stuff =
     202
     203== Essential one-time steps for any login ==
     204
     205Find old SSH keys, and print lines to remove them:
     206
     207{{{
     208for login in $logins ; do ssh $login true |& grep ssh-keygen | sed -e 's/remove with://' ; done
     209}}}
     210
     211(Repeat that until it returns with no output.
     212
     213Make sure I can log in:
     214
     215{{{
     216shmux -c "hostname" $logins
     217}}}
     218
     219Copy in my dotfiles:
     220
     221{{{
     222for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/.cfhome/ '' ; done
     223}}}
     224
     225(FIXME: This next thing doesn't really belong here, but I haven't figured out where else to put it.)
     226
     227Check for errors from a prsync command:
     228
     229{{{
     230for slicename in $slices ; do export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; for file in ~/tmp/prsync-errors/$slicename/* ; do test -s $file && echo "==> $file" && cat $file && echo "" ; done  ; done
     231}}}
     232
     233= Pingtest stuff =
     234
     235There are two ways to do this, one "fast" one that works ok for small numbers of hosts but not for lots of hosts at once, and one "reliable" one that works consistently even with large numbers of hosts.
     236
     237== Install files ==
     238
     239The first of these is for both ways; the last two are only needed for "the reliable way".
     240
     241{{{
     242for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/slices/*/reachability/addrs-$slicename.conf pingtest.conf ; done
     243shmux -c 'mkdir -p bin' $logins
     244for slicename in $slices ; do loginfile=~/tmp/logins-$slicename.txt ; export PSSH_ERRDIR=~/tmp/prsync-errors/$slicename ; prsync -h $loginfile -a ~/subversion/syseng/geni/share/experiment-setup/plastic-slices/reachability/pingtest bin/pingtest ; done
     245}}}
     246
     247== The fast way ==
     248
     249This way uses fping, which is very fast, but seems to be unreliable, more so when there are large numbers of hosts. (I think the parallelizing is overwhelming something -- something OpenFlow-related? A switch? My controller? With ARPs? Hard to tell without digging deeper than I have.)
     250
     251It's still useful for a quick one-host-at-a-time check, but the comprehensive pingtest is pretty unlikely to work at any scale greater than a few hosts.
     252
     253=== Run a fast pingtest on one host ===
     254
     255{{{
     256fping -q -c 10 < pingtest.conf |& grep -v "ICMP Host Unreachable"
     257}}}
     258
     259=== Run a comprehensive pingtest ===
     260
     261{{{
     262cd
     263rm -rf ~/tmp/pingtest
     264mkdir -p ~/tmp/pingtest
     265cd ~/tmp/pingtest
     266for login in $logins ; do ssh -n $login 'fping -q -c 10 < pingtest.conf |& grep -v "ICMP Host Unreachable"' > $login && echo $login & done
     267}}}
     268
     269=== Analyze the results ===
     270
     271Show everything that isn't up (i.e. that doesn't have exactly 0% packet loss):
     272
     273{{{
     274grep -v "/0%" *
     275}}}
     276
     277Show only things that are 100% down:
     278
     279{{{
     280grep "/100%" *
     281}}}
     282
     283Show only things that have some packet loss other than 0% or 100%:
     284
     285{{{
     286egrep -v "(/0%|/100%)" *
     287}}}
     288
     289Show everything that isn't up, but exclude things that are known to be down (in this example, Washington and Stanford):
     290
     291{{{
     292grep -v "/0%" * | egrep -v "(10.42.[0-9]+.(8[01]|9[01])|(washington|stanford))"
     293}}}
     294
     295Show everything that is "up" (0% packet loss):
     296
     297{{{
     298grep "/0%" *
     299}}}
     300
     301== The reliable way ==
     302
     303This way uses a script that pings one host at a time, which takes longer, but seems to be very reliable.
     304
     305=== Run a comprehensive pingtest ===
     306
     307{{{
     308cd
     309rm -rf ~/tmp/pingtest
     310mkdir -p ~/tmp/pingtest
     311cd ~/tmp/pingtest
     312for login in $logins ; do ssh -n $login pingtest '$(cat pingtest.conf)' > $login && echo $login & done
     313}}}
     314
     315=== Analyze the results ===
     316
     317Show everything that isn't "up" (0% packet loss):
     318 
     319{{{
     320grep -v "is up" *
     321}}}
     322 
     323Show only things that are "down" (100% packet loss):
     324 
     325{{{
     326grep "is down" *
     327}}}
     328 
     329Show only things that are "partial" (some packet packet loss, but not 100%):
     330 
     331{{{
     332grep "is partial" *
     333}}}
     334 
     335Show everything that isn't "up', but exclude things that are known to be down (in this example, Washington and Stanford):
     336 
     337{{{
     338grep -v "is up" * | egrep -v "(10.42.[0-9]+.(8[01]|9[01])|(washington|stanford))"
     339}}}
     340
     341= GEMINI stuff =
     342
     343Initialize:
     344
     345{{{
     346for slicename in $slices ; do ./gdesktop-init.py --certificate=~/.ssl/jbs\@ch.geni.net.pem --project=JBS --slicename=$slicename ; done
     347}}}
     348
     349Instrumentize:
     350
     351{{{
     352for slicename in $slices ; do ./gdesktop-instrumentize.py --certificate=~/.ssl/jbs\@ch.geni.net.pem --project=JBS --slicename=$slicename ; done
     353}}}
     354
     355= Remote commands =
     356
     357Here are some examples of using shmux to run commands remotely on all logins.
     358
     359== Install fping and iperf ==
     360
     361{{{
     362shmux -c 'sudo $PKGMGR -y install fping iperf' $logins
     363}}}
     364
     365== See what version of iperf is installed ==
     366
     367{{{
     368shmux -c 'pkgversion iperf' $logins
     369}}}
     370
     371= Controller stuff =
     372
     373I generally do this stuff on naxos.
     374
     375== Run NOX 'switch' with a LAVI interface ==
     376
     377This runs NOX with the 'switch' module on port 33xxx, and a LAVI interface on port 11XXX.
     378
     379{{{
     380subnet=017
     381port=33$subnet ; (cd /usr/bin && /usr/bin/nox_core --info=/home/jbs/nox/nox-${port}.info -i ptcp:$port switch lavi_switches jsonmessenger=tcpport=11$subnet,sslport=0)
     382}}}
     383
     384== Get a list of DPIDs from NOX ==
     385
     386This uses a script from Aaron Rosen (from http://paste.pocoo.org/show/555163/) to talk to LAVI and print a list of DPIDs (and pipes them to sort).
     387
     388{{{
     389subnet=017 ; nox-console -n localhost -p 11$subnet getnodes | sort
     390}}}
     391
     392== Get a list of DPIDs from rspecs ==
     393
     394If you  have a directory full of rspecs, you can print a list of DPIDs that they include, in the same format as the output from nox-console. (Note that this is pretty naive, and doesn't try to do things like avoid commented-out things, etc.)
     395
     396{{{
     397cd ~/rspecs/request/jbs15
     398grep -h component_id openflow-* | esed -e 's/.+datapath\+([^"]+).+$/\1/'|sort
     399}}}
     400
     401= Old stuff =
     402
     403Stuff I was doing at one point, but am not any more.
     404
     405= MyPLC stuff =
     406
     407== Essential one-time steps for each MyPLC login ==
     408
     409Set the hostname:
     410
     411{{{
     412for login in $myplclogins ; do ssh $login sudo hostname $login ; done
     413}}}
     414
     415Enable cron and start it up:
     416
     417{{{
     418shmux -c 'sudo chkconfig crond on && sudo service crond status | grep running || sudo service crond start' $myplclogins
     419}}}
     420
     421You should only have to do each of those once, but they're safe to repeat.
     422
     423= wapgnode stuff =
     424
     425== Specify which VLAN to fiddle with ==
     426
     427Do this before doing any of the other things in this section.
     428
     429{{{
     430VLAN=<vlan-id>
     431}}}
     432
     433== Install and start using a new ifcfg-eth1.VLAN file ==
     434
     435{{{
     436for host in $logins ; do rscp ~/tmp/emulab-wapg-vlan-testing/ifcfg-eth1-$host $host:ifcfg-eth1.$VLAN ; done
     437shmux -c "sudo cp ifcfg-eth1.$VLAN /etc/sysconfig/network-scripts" $logins
     438shmux -c "sudo ifup eth1.$VLAN" $logins
     439shmux -c "ifconfig eth1.$VLAN" $logins
     440}}}
     441
     442== Stop using and delete an old interface ==
     443
     444{{{
     445shmux -c "sudo ifdown eth1.$VLAN" $logins
     446shmux -c "sudo rm /etc/sysconfig/network-scripts/ifcfg-eth1.$VLAN" $logins
     447}}}
     448
     449== Configure FlowVisor ==
     450
     451{{{
     452slicename=jbs17
     453subnet=017
     454
     455mkdir -p ~/$slicename/flowvisor
     456cd ~/$slicename/flowvisor
     457fvconfig generate flowvisor-$slicename.xml naxos $(cat /etc/flowvisor/fvpasswd) 33$subnet 8$subnet
     458
     459cd
     460flowvisor ~/$slicename/flowvisor/flowvisor-$slicename.xml >> ~/$slicename/flowvisor/flowvisor-$slicename.log 2>&1 &
     461fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet setConfig 'flowvisor!log_ident' flowvisor$subnet
     462for slice in $(fvctl --url=https://localhost:8$subnet --passwd-file=/etc/flowvisor/fvpasswd listSlices |& grep Slice | grep -v root | awk '{ print $3; }') ; do fvctl --url=https://localhost:8$subnet --passwd-file=/etc/flowvisor/fvpasswd deleteSlice $slice ; done
     463fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet createSlice $slicename tcp:localhost:42$subnet jbs@bbn.com
     464fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet addFlowSpace any 100 dl_type=0x800,nw_dst=10.42.$subnet.0/24 "Slice:$slicename=4"
     465fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet addFlowSpace any 100 dl_type=0x800,nw_src=10.42.$subnet.0/24 "Slice:$slicename=4"
     466}}}
     467
     468== Run a FlowVisor on port 33xxx ==
     469
     470{{{
     471flowvisor ~/$slicename/flowvisor/flowvisor-$slicename.xml >> ~/$slicename/flowvisor/flowvisor-$slicename.log 2>&1 &
     472}}}
     473
     474== Run NOX on port 42xxx ==
     475
     476{{{
     477port=42017 ; (cd /usr/bin && /usr/bin/nox_core --info=/home/jbs/$slicename/nox/nox-${port}.info -i ptcp:$port switch)
     478}}}
     479
     480== Talk to a FlowVisor on a specific port ==
     481
     482{{{
     483fvctl --passwd-file=/etc/flowvisor/fvpasswd --url=https://localhost:8$subnet listFlowSpace
     484}}}
     485
     486== Kill all the FlowVisors that you own ==
     487
     488{{{
     489pkill -u $USER -f "org.flowvisor.FlowVisor"
     490}}}