Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -9,10 +9,11 @@ http-transport.scm nmsg-transport.scm filedb.scm \ client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \ tree.scm ezsteps.scm lock-queue.scm sdb.scm \ rmt.scm api.scm tdb.scm rpc-transport.scm \ portlogger.scm archive.scm +MTQA_FOSSIL=$(HOME)/fossils/megatest_qa.fossil # Eggs to install (straightforward ones) EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \ dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \ json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \ @@ -153,12 +154,19 @@ $(PREFIX)/bin/newdashboard $(PREFIX)/bin/mdboard $(PREFIX)/bin/.$(ARCHSTR) : mkdir -p $(PREFIX)/bin/.$(ARCHSTR) -test: tests/tests.scm - cd tests;csi -I .. -b -n tests.scm +test: ext-tests/.fslckout + cd ext-tests;csi -I .. -b -n tests.scm + +ext-tests/.fslckout : $(MTQA_FOSSIL) + mkdir -p ext-tests + cd ext-tests;fossil open --nested $(MTQA_FOSSIL) + +$(MTQA_FOSSIL) : + fossil clone https://www.kiatoa.com/fossils/megatest_qa $(MTQA_FOSSIL) clean : rm -f $(OFILES) $(GOFILES) megatest dboard dboard.o megatest.o dashboard.o # Deploy section (not complete yet) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -462,11 +462,13 @@ (system (conc "cd " rundir ";mt_xterm -T \"" (string-translate testfullname "()" " ") "\" " shell "&"))) (message-window (conc "Directory " rundir " not found"))))) (widgets (make-hash-table)) (refreshdat (lambda () - (let* ((curr-mod-time (file-modification-time db-path)) + (let* ((curr-mod-time (if (file-exists? db-path) + (file-modification-time db-path) + 0)) ;; (max ..... (if (file-exists? testdat-path) ;; (file-modification-time testdat-path) ;; (begin ;; (set! testdat-path (conc rundir "/testdat.db")) ;; 0)))) Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -1485,13 +1485,16 @@ (handle-exceptions exn (begin (debug:print 0 "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn)) (current-seconds)) ;; something went wrong - just print an error and return current-seconds - (apply max (map (lambda (filen) - (file-modification-time filen)) - (glob (conc *dbdir* "/*.db")))))) + (let ((dbfiles (map (lambda (filen) + (file-modification-time filen)) + (glob (conc *dbdir* "/*.db"))))) + (if (not (null? dbfiles)) + (apply max dbfiles) + 9e99)))) (define (dashboard:run-update x) (let* ((modtime (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*)) (monitor-modtime (if (file-exists? *monitor-db-path*) (file-modification-time *monitor-db-path*) ADDED docs/manual/client.ps Index: docs/manual/client.ps ================================================================== --- /dev/null +++ docs/manual/client.ps @@ -0,0 +1,693 @@ +%!PS-Adobe-3.0 +%%Creator: graphviz version 2.38.0 (20140413.2041) +%%Title: G +%%Pages: (atend) +%%BoundingBox: (atend) +%%EndComments +save +%%BeginProlog +/DotDict 200 dict def +DotDict begin + +/setupLatin1 { +mark +/EncodingVector 256 array def + EncodingVector 0 + +ISOLatin1Encoding 0 255 getinterval putinterval +EncodingVector 45 /hyphen put + +% Set up ISO Latin 1 character encoding +/starnetISO { + dup dup findfont dup length dict begin + { 1 index /FID ne { def }{ pop pop } ifelse + } forall + /Encoding EncodingVector def + currentdict end definefont +} def +/Times-Roman starnetISO def +/Times-Italic starnetISO def +/Times-Bold starnetISO def +/Times-BoldItalic starnetISO def +/Helvetica starnetISO def +/Helvetica-Oblique starnetISO def +/Helvetica-Bold starnetISO def +/Helvetica-BoldOblique starnetISO def +/Courier starnetISO def +/Courier-Oblique starnetISO def +/Courier-Bold starnetISO def +/Courier-BoldOblique starnetISO def +cleartomark +} bind def + +%%BeginResource: procset graphviz 0 0 +/coord-font-family /Times-Roman def +/default-font-family /Times-Roman def +/coordfont coord-font-family findfont 8 scalefont def + +/InvScaleFactor 1.0 def +/set_scale { + dup 1 exch div /InvScaleFactor exch def + scale +} bind def + +% styles +/solid { [] 0 setdash } bind def +/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def +/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def +/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def +/bold { 2 setlinewidth } bind def +/filled { } bind def +/unfilled { } bind def +/rounded { } bind def +/diagonals { } bind def +/tapered { } bind def + +% hooks for setting color +/nodecolor { sethsbcolor } bind def +/edgecolor { sethsbcolor } bind def +/graphcolor { sethsbcolor } bind def +/nopcolor {pop pop pop} bind def + +/beginpage { % i j npages + /npages exch def + /j exch def + /i exch def + /str 10 string def + npages 1 gt { + gsave + coordfont setfont + 0 0 moveto + (\() show i str cvs show (,) show j str cvs show (\)) show + grestore + } if +} bind def + +/set_font { + findfont exch + scalefont setfont +} def + +% draw text fitted to its expected width +/alignedtext { % width text + /text exch def + /width exch def + gsave + width 0 gt { + [] 0 setdash + text stringwidth pop width exch sub text length div 0 text ashow + } if + grestore +} def + +/boxprim { % xcorner ycorner xsize ysize + 4 2 roll + moveto + 2 copy + exch 0 rlineto + 0 exch rlineto + pop neg 0 rlineto + closepath +} bind def + +/ellipse_path { + /ry exch def + /rx exch def + /y exch def + /x exch def + matrix currentmatrix + newpath + x y translate + rx ry scale + 0 0 1 0 360 arc + setmatrix +} bind def + +/endpage { showpage } bind def +/showpage { } def + +/layercolorseq + [ % layer color sequence - darkest to lightest + [0 0 0] + [.2 .8 .8] + [.4 .8 .8] + [.6 .8 .8] + [.8 .8 .8] + ] +def + +/layerlen layercolorseq length def + +/setlayer {/maxlayer exch def /curlayer exch def + layercolorseq curlayer 1 sub layerlen mod get + aload pop sethsbcolor + /nodecolor {nopcolor} def + /edgecolor {nopcolor} def + /graphcolor {nopcolor} def +} bind def + +/onlayer { curlayer ne {invis} if } def + +/onlayers { + /myupper exch def + /mylower exch def + curlayer mylower lt + curlayer myupper gt + or + {invis} if +} def + +/curlayer 0 def + +%%EndResource +%%EndProlog +%%BeginSetup +14 default-font-family set_font +1 setmiterlimit +% /arrowlength 10 def +% /arrowwidth 5 def + +% make sure pdfmark is harmless for PS-interpreters other than Distiller +/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse +% make '<<' and '>>' safe on PS Level 1 devices +/languagelevel where {pop languagelevel}{1} ifelse +2 lt { + userdict (<<) cvn ([) cvn load put + userdict (>>) cvn ([) cvn load put +} if + +%%EndSetup +setupLatin1 +%%Page: 1 1 +%%PageBoundingBox: 36 36 450 550 +%%PageOrientation: Portrait +0 0 1 beginpage +gsave +36 36 414 514 boxprim clip newpath +1 1 set_scale 0 rotate 40 40 translate +% cluster_2 +gsave +1 setlinewidth +0.33333 1 1 graphcolor +newpath 8 8 moveto +8 498 lineto +341 498 lineto +341 8 lineto +closepath stroke +0 0 0 graphcolor +14 /Times-Roman set_font +143 482.8 moveto 63 (client:setup) alignedtext +grestore +% cluster_3 +gsave +1 setlinewidth +0 0 0 graphcolor +newpath 16 342 moveto +16 467 lineto +166 467 lineto +166 342 lineto +closepath stroke +grestore +% client:setup start +gsave +0 0 0.82745 nodecolor +newpath 282 386 moveto +176 386 lineto +176 350 lineto +282 350 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 282 386 moveto +176 386 lineto +176 350 lineto +282 350 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +184 364.3 moveto 90 (client:setup start) alignedtext +grestore +% runremote_lookup_server +gsave +0 0 0.82745 nodecolor +newpath 217.5 313 moveto +60.5 313 lineto +60.5 277 lineto +217.5 277 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 217.5 313 moveto +60.5 313 lineto +60.5 277 lineto +217.5 277 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +68.5 291.3 moveto 141 (runremote_lookup_server) alignedtext +grestore +% client:setup start->runremote_lookup_server +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 207.21 349.81 moveto +195.61 340.66 181.16 329.26 168.56 319.32 curveto +stroke +0 0 0 edgecolor +newpath 170.6 316.47 moveto +160.59 313.03 lineto +166.27 321.97 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 170.6 316.47 moveto +160.59 313.03 lineto +166.27 321.97 lineto +closepath stroke +grestore +% login_attempt +gsave +0 0 0.82745 nodecolor +newpath 324 139 moveto +232 139 lineto +232 103 lineto +324 103 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 324 139 moveto +232 139 lineto +232 103 lineto +324 103 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +240 117.3 moveto 76 (login_attempt) alignedtext +grestore +% runremote_lookup_server->login_attempt +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 190.79 276.8 moveto +200.53 272.04 210.12 266.16 218 259 curveto +250.85 229.16 251.17 213.46 267 172 curveto +269.79 164.69 271.96 156.5 273.61 148.9 curveto +stroke +0 0 0 edgecolor +newpath 277.06 149.52 moveto +275.54 139.03 lineto +270.19 148.17 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 277.06 149.52 moveto +275.54 139.03 lineto +270.19 148.17 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +261 204.3 moveto 63 (have server) alignedtext +grestore +% monitordb_lookup_server +gsave +0 0 0.82745 nodecolor +newpath 194 226 moveto +36 226 lineto +36 190 lineto +194 190 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 194 226 moveto +36 226 lineto +36 190 lineto +194 190 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +44 204.3 moveto 142 (monitordb_lookup_server) alignedtext +grestore +% runremote_lookup_server->monitordb_lookup_server +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 134.14 276.8 moveto +130.83 265.05 126.36 249.24 122.58 235.84 curveto +stroke +0 0 0 edgecolor +newpath 125.94 234.85 moveto +119.85 226.18 lineto +119.2 236.75 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 125.94 234.85 moveto +119.85 226.18 lineto +119.2 236.75 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +129 247.8 moveto 51 (no server) alignedtext +grestore +% rmt:send-receive_start +gsave +0 0 0.82745 nodecolor +newpath 289 52 moveto +151 52 lineto +151 16 lineto +289 16 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 289 52 moveto +151 52 lineto +151 16 lineto +289 16 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +159 30.3 moveto 122 (rmt:send-receive_start) alignedtext +grestore +% login_attempt->rmt:send-receive_start +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 266.26 102.8 moveto +258.01 90.7 246.82 74.3 237.51 60.67 curveto +stroke +0 0 0 edgecolor +newpath 240.25 58.46 moveto +231.72 52.18 lineto +234.47 62.41 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 240.25 58.46 moveto +231.72 52.18 lineto +234.47 62.41 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +253 73.8 moveto 82 (login sucessful) alignedtext +grestore +% clear_runremote +gsave +0 0 0.82745 nodecolor +newpath 151 459 moveto +45 459 lineto +45 423 lineto +151 423 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 151 459 moveto +45 459 lineto +45 423 lineto +151 423 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +53 437.3 moveto 90 (clear_runremote) alignedtext +grestore +% login_attempt->clear_runremote +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 294.97 139.02 moveto +306.47 151.83 320.7 170.47 327 190 curveto +354.2 274.29 351.99 321.78 291 386 curveto +257.73 421.03 203.76 433.95 161.34 438.43 curveto +stroke +0 0 0 edgecolor +newpath 161.01 434.95 moveto +151.38 439.36 lineto +161.66 441.92 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 161.01 434.95 moveto +151.38 439.36 lineto +161.66 441.92 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +343 291.3 moveto 63 (login failed) alignedtext +grestore +% monitordb_lookup_server->login_attempt +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 115.13 189.95 moveto +116.14 179.1 119.27 165.52 128 157 curveto +141.39 143.95 184.91 134.71 221.65 129.01 curveto +stroke +0 0 0 edgecolor +newpath 222.43 132.44 moveto +231.81 127.5 lineto +221.4 125.51 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 222.43 132.44 moveto +231.81 127.5 lineto +221.4 125.51 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +128 160.8 moveto 63 (have server) alignedtext +grestore +% server_start_remote +gsave +0 0 0.82745 nodecolor +newpath 161.5 139 moveto +36.5 139 lineto +36.5 103 lineto +161.5 103 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 161.5 139 moveto +36.5 139 lineto +36.5 103 lineto +161.5 103 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +44.5 117.3 moveto 109 (server_start_remote) alignedtext +grestore +% monitordb_lookup_server->server_start_remote +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 71.66 189.93 moveto +64.6 185.19 58.22 179.29 54 172 curveto +48.62 162.72 52.9 153.65 60.6 145.85 curveto +stroke +0 0 0 edgecolor +newpath 63.17 148.26 moveto +68.45 139.07 lineto +58.59 142.96 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 63.17 148.26 moveto +68.45 139.07 lineto +58.59 142.96 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +54 160.8 moveto 51 (no server) alignedtext +grestore +% delay_2_sec +gsave +0 0 0.82745 nodecolor +newpath 116.5 52 moveto +33.5 52 lineto +33.5 16 lineto +116.5 16 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 116.5 52 moveto +33.5 52 lineto +33.5 16 lineto +116.5 16 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +41.5 30.3 moveto 67 (delay_2_sec) alignedtext +grestore +% server_start_remote->delay_2_sec +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 94.14 102.8 moveto +90.83 91.05 86.36 75.24 82.58 61.84 curveto +stroke +0 0 0 edgecolor +newpath 85.94 60.85 moveto +79.85 52.18 lineto +79.2 62.75 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 85.94 60.85 moveto +79.85 52.18 lineto +79.2 62.75 lineto +closepath stroke +grestore +% delay_2_sec->runremote_lookup_server +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 58.78 52.17 moveto +47.77 65.05 34.14 83.72 28 103 curveto +12.8 150.76 20.08 213.71 27 226 curveto +38.66 246.72 59.47 261.86 80.02 272.52 curveto +stroke +0 0 0 edgecolor +newpath 78.57 275.71 moveto +89.09 276.96 lineto +81.65 269.42 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 78.57 275.71 moveto +89.09 276.96 lineto +81.65 269.42 lineto +closepath stroke +grestore +% rmt:send-receive_start->runremote_lookup_server +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 218.57 52.19 moveto +214.71 98.36 204.3 221.87 203 226 curveto +197.91 242.14 195.11 246.23 184 259 curveto +180.6 262.91 176.7 266.69 172.65 270.22 curveto +stroke +0 0 0 edgecolor +newpath 170.29 267.64 moveto +164.79 276.69 lineto +174.73 273.05 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 170.29 267.64 moveto +164.79 276.69 lineto +174.73 273.05 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +210 160.8 moveto 53 (exception) alignedtext +grestore +% rmt:send-receive_start->rmt:send-receive_start +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 289.27 41.18 moveto +299.83 40.09 307 37.69 307 34 curveto +307 31.64 304.06 29.8 299.17 28.5 curveto +stroke +0 0 0 edgecolor +newpath 299.72 25.05 moveto +289.27 26.82 lineto +298.55 31.95 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 299.72 25.05 moveto +289.27 26.82 lineto +298.55 31.95 lineto +closepath stroke +grestore +% remove_running > 5s +gsave +0 0 0.82745 nodecolor +newpath 158 386 moveto +24 386 lineto +24 350 lineto +158 350 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 158 386 moveto +24 386 lineto +24 350 lineto +158 350 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +32 364.3 moveto 118 (remove_running > 5s) alignedtext +grestore +% clear_runremote->remove_running > 5s +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 96.31 422.81 moveto +95.51 414.79 94.55 405.05 93.67 396.07 curveto +stroke +0 0 0 edgecolor +newpath 97.14 395.64 moveto +92.68 386.03 lineto +90.18 396.32 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 97.14 395.64 moveto +92.68 386.03 lineto +90.18 396.32 lineto +closepath stroke +grestore +% remove_running > 5s->runremote_lookup_server +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 102.62 349.81 moveto +108.4 341.27 115.49 330.77 121.88 321.32 curveto +stroke +0 0 0 edgecolor +newpath 124.79 323.27 moveto +127.49 313.03 lineto +118.99 319.35 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 124.79 323.27 moveto +127.49 313.03 lineto +118.99 319.35 lineto +closepath stroke +grestore +endpage +showpage +grestore +%%PageTrailer +%%EndPage: 1 +%%Trailer +%%Pages: 1 +%%BoundingBox: 36 36 450 550 +end +restore +%%EOF ADDED docs/manual/server.ps Index: docs/manual/server.ps ================================================================== --- /dev/null +++ docs/manual/server.ps @@ -0,0 +1,838 @@ +%!PS-Adobe-3.0 +%%Creator: graphviz version 2.38.0 (20140413.2041) +%%Title: G +%%Pages: (atend) +%%BoundingBox: (atend) +%%EndComments +save +%%BeginProlog +/DotDict 200 dict def +DotDict begin + +/setupLatin1 { +mark +/EncodingVector 256 array def + EncodingVector 0 + +ISOLatin1Encoding 0 255 getinterval putinterval +EncodingVector 45 /hyphen put + +% Set up ISO Latin 1 character encoding +/starnetISO { + dup dup findfont dup length dict begin + { 1 index /FID ne { def }{ pop pop } ifelse + } forall + /Encoding EncodingVector def + currentdict end definefont +} def +/Times-Roman starnetISO def +/Times-Italic starnetISO def +/Times-Bold starnetISO def +/Times-BoldItalic starnetISO def +/Helvetica starnetISO def +/Helvetica-Oblique starnetISO def +/Helvetica-Bold starnetISO def +/Helvetica-BoldOblique starnetISO def +/Courier starnetISO def +/Courier-Oblique starnetISO def +/Courier-Bold starnetISO def +/Courier-BoldOblique starnetISO def +cleartomark +} bind def + +%%BeginResource: procset graphviz 0 0 +/coord-font-family /Times-Roman def +/default-font-family /Times-Roman def +/coordfont coord-font-family findfont 8 scalefont def + +/InvScaleFactor 1.0 def +/set_scale { + dup 1 exch div /InvScaleFactor exch def + scale +} bind def + +% styles +/solid { [] 0 setdash } bind def +/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def +/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def +/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def +/bold { 2 setlinewidth } bind def +/filled { } bind def +/unfilled { } bind def +/rounded { } bind def +/diagonals { } bind def +/tapered { } bind def + +% hooks for setting color +/nodecolor { sethsbcolor } bind def +/edgecolor { sethsbcolor } bind def +/graphcolor { sethsbcolor } bind def +/nopcolor {pop pop pop} bind def + +/beginpage { % i j npages + /npages exch def + /j exch def + /i exch def + /str 10 string def + npages 1 gt { + gsave + coordfont setfont + 0 0 moveto + (\() show i str cvs show (,) show j str cvs show (\)) show + grestore + } if +} bind def + +/set_font { + findfont exch + scalefont setfont +} def + +% draw text fitted to its expected width +/alignedtext { % width text + /text exch def + /width exch def + gsave + width 0 gt { + [] 0 setdash + text stringwidth pop width exch sub text length div 0 text ashow + } if + grestore +} def + +/boxprim { % xcorner ycorner xsize ysize + 4 2 roll + moveto + 2 copy + exch 0 rlineto + 0 exch rlineto + pop neg 0 rlineto + closepath +} bind def + +/ellipse_path { + /ry exch def + /rx exch def + /y exch def + /x exch def + matrix currentmatrix + newpath + x y translate + rx ry scale + 0 0 1 0 360 arc + setmatrix +} bind def + +/endpage { showpage } bind def +/showpage { } def + +/layercolorseq + [ % layer color sequence - darkest to lightest + [0 0 0] + [.2 .8 .8] + [.4 .8 .8] + [.6 .8 .8] + [.8 .8 .8] + ] +def + +/layerlen layercolorseq length def + +/setlayer {/maxlayer exch def /curlayer exch def + layercolorseq curlayer 1 sub layerlen mod get + aload pop sethsbcolor + /nodecolor {nopcolor} def + /edgecolor {nopcolor} def + /graphcolor {nopcolor} def +} bind def + +/onlayer { curlayer ne {invis} if } def + +/onlayers { + /myupper exch def + /mylower exch def + curlayer mylower lt + curlayer myupper gt + or + {invis} if +} def + +/curlayer 0 def + +%%EndResource +%%EndProlog +%%BeginSetup +14 default-font-family set_font +1 setmiterlimit +% /arrowlength 10 def +% /arrowwidth 5 def + +% make sure pdfmark is harmless for PS-interpreters other than Distiller +/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse +% make '<<' and '>>' safe on PS Level 1 devices +/languagelevel where {pop languagelevel}{1} ifelse +2 lt { + userdict (<<) cvn ([) cvn load put + userdict (>>) cvn ([) cvn load put +} if + +%%EndSetup +setupLatin1 +%%Page: 1 1 +%%PageBoundingBox: 36 36 502 935 +%%PageOrientation: Portrait +0 0 1 beginpage +gsave +36 36 466 899 boxprim clip newpath +1 1 set_scale 0 rotate 40 40 translate +% cluster_1 +gsave +1 setlinewidth +0 0.7451 0.64706 graphcolor +newpath 8 8 moveto +8 883 lineto +450 883 lineto +450 8 lineto +closepath stroke +0 0 0 graphcolor +14 /Times-Roman set_font +192.5 867.8 moveto 73 (server:launch) alignedtext +grestore +% cluster_2 +gsave +1 setlinewidth +0 0 0 graphcolor +newpath 16 81 moveto +16 467 lineto +328 467 lineto +328 81 lineto +closepath stroke +grestore +% check_available_queue +gsave +0 0 0.82745 nodecolor +newpath 383.5 852 moveto +240.5 852 lineto +240.5 816 lineto +383.5 816 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 383.5 852 moveto +240.5 852 lineto +240.5 816 lineto +383.5 816 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +248.5 830.3 moveto 127 (check_available_queue) alignedtext +grestore +% remove_entries_over_10s_old +gsave +0 0 0.82745 nodecolor +newpath 402.5 779 moveto +221.5 779 lineto +221.5 743 lineto +402.5 743 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 402.5 779 moveto +221.5 779 lineto +221.5 743 lineto +402.5 743 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +229.5 757.3 moveto 165 (remove_entries_over_10s_old) alignedtext +grestore +% check_available_queue->remove_entries_over_10s_old +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 312 815.81 moveto +312 807.79 312 798.05 312 789.07 curveto +stroke +0 0 0 edgecolor +newpath 315.5 789.03 moveto +312 779.03 lineto +308.5 789.03 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 315.5 789.03 moveto +312 779.03 lineto +308.5 789.03 lineto +closepath stroke +grestore +% set_available +gsave +0 0 0.82745 nodecolor +newpath 315.5 692 moveto +228.5 692 lineto +228.5 656 lineto +315.5 656 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 315.5 692 moveto +228.5 692 lineto +228.5 656 lineto +315.5 656 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +236.5 670.3 moveto 71 (set_available) alignedtext +grestore +% remove_entries_over_10s_old->set_available +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 288.59 742.95 moveto +283.2 737.81 278.14 731.73 275 725 curveto +271.8 718.14 270.43 710.17 270.01 702.64 curveto +stroke +0 0 0 edgecolor +newpath 273.51 702.42 moveto +269.97 692.43 lineto +266.51 702.45 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 273.51 702.42 moveto +269.97 692.43 lineto +266.51 702.45 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +275 713.8 moveto 81 (num_avail < 3) alignedtext +grestore +% exit +gsave +0 0 0.82745 nodecolor +newpath 381 52 moveto +327 52 lineto +327 16 lineto +381 16 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 381 52 moveto +327 52 lineto +327 16 lineto +381 16 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +343.5 30.3 moveto 21 (exit) alignedtext +grestore +% remove_entries_over_10s_old->exit +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 340.49 742.84 moveto +346.56 737.84 352.26 731.86 356 725 curveto +366.69 705.39 361 697.33 361 675 curveto +361 675 361 675 361 440 curveto +361 344.75 434 333.75 434 238.5 curveto +434 238.5 434 238.5 434 106 curveto +434 80.65 411.26 62.15 390.02 50.4 curveto +stroke +0 0 0 edgecolor +newpath 391.57 47.26 moveto +381.08 45.78 lineto +388.36 53.48 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 391.57 47.26 moveto +381.08 45.78 lineto +388.36 53.48 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +368 393.8 moveto 81 (num_avail > 2) alignedtext +grestore +% delay_2s +gsave +0 0 0.82745 nodecolor +newpath 304.5 619 moveto +239.5 619 lineto +239.5 583 lineto +304.5 583 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 304.5 619 moveto +239.5 619 lineto +239.5 583 lineto +304.5 583 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +247.5 597.3 moveto 49 (delay_2s) alignedtext +grestore +% set_available->delay_2s +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 272 655.81 moveto +272 647.79 272 638.05 272 629.07 curveto +stroke +0 0 0 edgecolor +newpath 275.5 629.03 moveto +272 619.03 lineto +268.5 629.03 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 275.5 629.03 moveto +272 619.03 lineto +268.5 629.03 lineto +closepath stroke +grestore +% check_place_in_queue +gsave +0 0 0.82745 nodecolor +newpath 342 546 moveto +202 546 lineto +202 510 lineto +342 510 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 342 546 moveto +202 546 lineto +202 510 lineto +342 510 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +210 524.3 moveto 124 (check_place_in_queue) alignedtext +grestore +% delay_2s->check_place_in_queue +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 272 582.81 moveto +272 574.79 272 565.05 272 556.07 curveto +stroke +0 0 0 edgecolor +newpath 275.5 556.03 moveto +272 546.03 lineto +268.5 556.03 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 275.5 556.03 moveto +272 546.03 lineto +268.5 556.03 lineto +closepath stroke +grestore +% check_place_in_queue->exit +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 295.97 509.79 moveto +308.96 499.05 323.96 484.05 332 467 curveto +337.44 455.47 354 251.25 354 238.5 curveto +354 238.5 354 238.5 354 106 curveto +354 91.65 354 75.67 354 62.51 curveto +stroke +0 0 0 edgecolor +newpath 357.5 62.22 moveto +354 52.22 lineto +350.5 62.22 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 357.5 62.22 moveto +354 52.22 lineto +350.5 62.22 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +353 277.3 moveto 61 (not at head) alignedtext +grestore +% http:transport-launch +gsave +0 0 0.82745 nodecolor +newpath 302 459 moveto +172 459 lineto +172 423 lineto +302 423 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 302 459 moveto +172 459 lineto +172 423 lineto +302 423 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +180 437.3 moveto 114 (http:transport-launch) alignedtext +grestore +% check_place_in_queue->http:transport-launch +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 264.92 509.8 moveto +260.03 497.93 253.44 481.93 247.89 468.45 curveto +stroke +0 0 0 edgecolor +newpath 251.12 467.09 moveto +244.07 459.18 lineto +244.64 469.75 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 251.12 467.09 moveto +244.07 459.18 lineto +244.64 469.75 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +257 480.8 moveto 40 (at head) alignedtext +grestore +% http:transport-run +gsave +0 0 0.82745 nodecolor +newpath 136 372 moveto +24 372 lineto +24 336 lineto +136 336 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 136 372 moveto +24 372 lineto +24 336 lineto +136 336 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +32 350.3 moveto 96 (http:transport-run) alignedtext +grestore +% http:transport-launch->http:transport-run +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 205.23 422.8 moveto +180.78 409.56 146.83 391.18 120.45 376.9 curveto +stroke +0 0 0 edgecolor +newpath 121.92 373.71 moveto +111.46 372.03 lineto +118.58 379.87 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 121.92 373.71 moveto +111.46 372.03 lineto +118.58 379.87 lineto +closepath stroke +grestore +% http:transport-keep-running +gsave +0 0 0.82745 nodecolor +newpath 320 372 moveto +154 372 lineto +154 336 lineto +320 336 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 320 372 moveto +154 372 lineto +154 336 lineto +320 336 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +162 350.3 moveto 150 (http:transport-keep-running) alignedtext +grestore +% http:transport-launch->http:transport-keep-running +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 237 422.8 moveto +237 411.16 237 395.55 237 382.24 curveto +stroke +0 0 0 edgecolor +newpath 240.5 382.18 moveto +237 372.18 lineto +233.5 382.18 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 240.5 382.18 moveto +237 372.18 lineto +233.5 382.18 lineto +closepath stroke +grestore +% client:login +gsave +0 0 0.82745 nodecolor +newpath 268 212 moveto +190 212 lineto +190 176 lineto +268 176 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 268 212 moveto +190 212 lineto +190 176 lineto +268 176 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +198 190.3 moveto 62 (client:login) alignedtext +grestore +% server:shutdown +gsave +0 0 0.82745 nodecolor +newpath 232 125 moveto +126 125 lineto +126 89 lineto +232 89 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 232 125 moveto +126 125 lineto +126 89 lineto +232 89 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +134 103.3 moveto 90 (server:shutdown) alignedtext +grestore +% client:login->server:shutdown +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 195.89 175.85 moveto +189.53 170.97 183.71 165.03 180 158 curveto +176.38 151.14 175.17 142.97 175.12 135.25 curveto +stroke +0 0 0 edgecolor +newpath 178.62 135.33 moveto +175.66 125.16 lineto +171.63 134.96 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 178.62 135.33 moveto +175.66 125.16 lineto +171.63 134.96 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +180 146.8 moveto 63 (login failed) alignedtext +grestore +% delay_5s +gsave +0 0 0.82745 nodecolor +newpath 317.5 125 moveto +252.5 125 lineto +252.5 89 lineto +317.5 89 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 317.5 125 moveto +252.5 125 lineto +252.5 89 lineto +317.5 89 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +260.5 103.3 moveto 49 (delay_5s) alignedtext +grestore +% client:login->delay_5s +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 240.33 175.8 moveto +248.3 163.7 259.11 147.3 268.09 133.67 curveto +stroke +0 0 0 edgecolor +newpath 271.11 135.45 moveto +273.68 125.18 lineto +265.26 131.6 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 271.11 135.45 moveto +273.68 125.18 lineto +265.26 131.6 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +261 146.8 moveto 46 (login ok) alignedtext +grestore +% server:shutdown->exit +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 220.92 88.99 moveto +250.31 77.07 289.23 61.28 317.39 49.85 curveto +stroke +0 0 0 edgecolor +newpath 318.91 53.01 moveto +326.86 46.01 lineto +316.28 46.53 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 318.91 53.01 moveto +326.86 46.01 lineto +316.28 46.53 lineto +closepath stroke +grestore +% tests running? +gsave +0 0 0.82745 nodecolor +newpath 275 299 moveto +183 299 lineto +183 263 lineto +275 263 lineto +closepath fill +1 setlinewidth +filled +0 0 0 nodecolor +newpath 275 299 moveto +183 299 lineto +183 263 lineto +275 263 lineto +closepath stroke +0 0 0 nodecolor +14 /Times-Roman set_font +191 277.3 moveto 76 (tests running?) alignedtext +grestore +% http:transport-keep-running->tests running? +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 235.06 335.81 moveto +234.16 327.79 233.06 318.05 232.05 309.07 curveto +stroke +0 0 0 edgecolor +newpath 235.52 308.57 moveto +230.92 299.03 lineto +228.56 309.36 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 235.52 308.57 moveto +230.92 299.03 lineto +228.56 309.36 lineto +closepath stroke +grestore +% tests running?->client:login +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 229 262.8 moveto +229 251.16 229 235.55 229 222.24 curveto +stroke +0 0 0 edgecolor +newpath 232.5 222.18 moveto +229 212.18 lineto +225.5 222.18 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 232.5 222.18 moveto +229 212.18 lineto +225.5 222.18 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +229 233.8 moveto 19 (yes) alignedtext +grestore +% tests running?->server:shutdown +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 206.39 262.81 moveto +192.11 250.45 174.91 232.41 167 212 curveto +157.35 187.09 162.73 156.51 169.07 134.98 curveto +stroke +0 0 0 edgecolor +newpath 172.49 135.79 moveto +172.2 125.2 lineto +165.82 133.66 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 172.49 135.79 moveto +172.2 125.2 lineto +165.82 133.66 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +167 190.3 moveto 14 (no) alignedtext +grestore +% delay_5s->http:transport-keep-running +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 301.19 125.27 moveto +305.18 130.56 308.91 136.68 311 143 curveto +333.1 209.8 314.78 235.72 284 299 curveto +278.84 309.61 271.04 319.78 263.27 328.37 curveto +stroke +0 0 0 edgecolor +newpath 260.58 326.13 moveto +256.22 335.79 lineto +265.65 330.95 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 260.58 326.13 moveto +256.22 335.79 lineto +265.65 330.95 lineto +closepath stroke +grestore +endpage +showpage +grestore +%%PageTrailer +%%EndPage: 1 +%%Trailer +%%Pages: 1 +%%BoundingBox: 36 36 502 935 +end +restore +%%EOF Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -12,10 +12,11 @@ ;; launch a task - this runs on the originating host, tests themselves ;; ;;====================================================================== (use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables) +;; pathname-expand) (import (prefix base64 base64:)) (import (prefix sqlite3 sqlite3:)) (declare (unit launch)) @@ -900,18 +901,18 @@ ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (cond ((and launcher hosts) ;; must be using ssh hostname - (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) + (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) (launcher - (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param))) + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) (else (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 "fullcmd: " fullcmd) ADDED launch.scm-baseline Index: launch.scm-baseline ================================================================== --- /dev/null +++ launch.scm-baseline @@ -0,0 +1,969 @@ + +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + +(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables) + +(import (prefix base64 base64:)) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit launch)) +(declare (uses common)) +(declare (uses configf)) +(declare (uses db)) +;; (declare (uses sdb)) +(declare (uses tdb)) +;; (declare (uses filedb)) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + + +(define (launch:runstep ezstep run-id test-id exit-info m tal) + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (logpro-used (file-exists? logpro-file))) + ;; NB// can safely assume we are in test-area directory + (debug:print 4 "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparms: " stepparms " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () + (let* ((cmd (conc stepcmd " > " stepname ".log")) + (pid (process-run cmd))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 "step " stepname " completed with exit code " (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let ((pid (process-run (conc "logpro " logpro-file " " (conc stepname ".html") " < " stepname ".log")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 "logpro for step " stepname " exited with code " (vector-ref exit-info 2))))) + + (let ((exinfo (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") ""))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna area-dat)) + (if logpro-used + (rmt:test-set-log! run-id test-id (conc stepname ".html") area-dat)) + ;; set the test final status + (let* ((this-step-status (cond + ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) + ((eq? (vector-ref exit-info 2) 0) 'pass) + (else 'fail))) + (overall-status (cond + ((eq? (vector-ref exit-info 3) 2) 'warn) ;; rollup-status + ((eq? (vector-ref exit-info 3) 0) 'pass) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING"))) + ) + (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (vector-ref exit-info 3)) + (case next-status + ((warn) + (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:execute encoded-cmd area-dat) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd))) + (setenv "MT_CMDINFO" encoded-cmd) + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + (transport (assoc/default 'transport cmdinfo)) + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + (port (assoc/default 'port cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc testpath "/" runscript))) + (if (and (file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + ;; (rollup-status 0) + ) + (change-directory top-path) + + ;; (set-signal-handler! signal/int (lambda () + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat))) + (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (begin + (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys area-dat)) + ;; (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup-for-run area-dat force: #t)) + (begin + (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + (change-directory toppath) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read))) ;; (read-config (conc toppath "/runconfigs.config") #f #t sections: (list "default" target)))) + ;; (setup-env-defaults (conc toppath "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each (lambda (section) + (for-each (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (setenv var (config:eval-string-in-environment val))) ;; val) + (debug:print 0 "ERROR: bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print 0 "ERROR: required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (configf:lookup (megatest:area-configdat area-dat) "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) + (set-item-env-vars itemdat) + (save-environment-as-files "megatest") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (vector #t #t #t 0)) + (job-thread #f) + (keep-going #t) + (runit (lambda () + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + + ;; (thread-sleep! 0.3) + (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING" area-dat) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if ezsteps + (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + (ezstepslst (hash-table-ref/default testconfig "ezsteps" '()))) + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + ;; check exit-info (vector-ref exit-info 1) + (if (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal))) + (if (and (steprun-good? logpro-used (vector-ref exit-info 2)) + (not (null? tal))) + (loop (car tal) (cdr tal) stepname))) + (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep)))))))) + (monitorjob (lambda () + (let* ((start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + (let loop ((minutes (calc-minutes))) + (begin + (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)) + (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) + (time-exceeded (> run-seconds runtlim))) + (if time-exceeded + (begin + (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) + #t) + #f))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f) + (if kill-job? + (begin + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id area-dat)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 "not killing process " pid " as it is not alive")))) + pids) + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (args:get-arg "-m") #f)) + (begin + (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (args:get-arg "-m") #f) + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. + (exit))) + (if keep-going + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if keep-going + (loop (calc-minutes))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (set! keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id area-dat))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" + ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run + ((eq? (vector-ref exit-info 3) 0) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (vector-ref exit-info 3) 1) "FAIL") + ((eq? (vector-ref exit-info 3) 2) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " (vector-ref exit-info 3)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + ;; (if (and (not (equal? item-path "")) + ;; (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) + (tests:summarize-items run-id test-id test-name #f) + (tests:summarize-test run-id test-id)) ;; don't force - just update if no + (mutex-unlock! m) + (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " + work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") + (if (not (vector-ref exit-info 1)) + (exit 4))))))) + +(define (launch:read-cached-config) + (if (get-environment-variable "MT_CMDINFO") ;; we are inside a test - do not reprocess configs + (let ((alistconfig (conc (get-environment-variable "MT_LINKTREE") "/" + (get-environment-variable "MT_TARGET") "/" + (get-environment-variable "MT_RUNNAME") "/" + ".megatest.cfg"))) + (if (file-exists? alistconfig) + (list (configf:read-alist alistconfig) + (get-environment-variable "MT_RUN_AREA_HOME")) + #f)) + #f)) + +(define (launch:read-megatest-config toppath) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))) + (if runname (setenv "MT_RUNNAME" runname)) + (find-and-read-config + (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + +;; set up the very basics needed for doing anything here. +(define (launch:setup-for-run area-dat #!key (force #f)) + ;; would set values for KEYS in the environment here for better support of env-override but + ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to + ;; pass on that idea for now + ;; special case + (let ((configdat (megatest:area-configdat area-dat))) + (if (or force (not (hash-table? configdat))) ;; no need to re-open on every call + (let* ((newconfiginfo (or (launch:read-cached-config) ;; no config cached - give up + (launch:read-megatest-config (megatest:area-path area-dat)))) + (configdat (car newconfiginfo)) + (toppath (cadr newconfiginfo))) + (megatest:area-configinfo-set! area-dat newconfiginfo) + (megatest:area-configdat-set! area-dat configdat) + (megatest:area-path-set! area-dat toppath) + (let* ((tmptransport (configf:lookup configdat "server" "transport")) + (transport (if tmptransport (string->symbol tmptransport) 'http))) + (if (member transport '(http rpc nmsg)) + (megatest:area-transport-set! area-dat transport) + (begin + (debug:print 0 "ERROR: Unrecognised transport " transport) + (exit)))) + (let ((linktree (configf:lookup configdat "setup" "linktree"))) ;; link tree is critical + (if linktree + (if (not (file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (begin + (debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config") + (exit 1))) + (if linktree + (let ((dbdir (conc linktree "/.db"))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (if (not (directory-exists? dbdir))(create-directory dbdir))) + (setenv "MT_LINKTREE" linktree)) + (begin + (debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section") + (exit 1))) + (if (and toppath + (directory-exists? toppath)) + (setenv "MT_RUN_AREA_HOME" toppath) + (begin + (debug:print 0 "ERROR: failed to find the top path to your Megatest area.") + (exit 1)))) + toppath)))) + +(define (launch:cache-config area-dat) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (let ((configdat (megatest:area-configdat area-dat))) + (if (and configdat + (args:get-arg "-runtests")) + (let* ((linktree (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (debug:print-info 0 "Have -runtests with target=" target ", runname=" runname ", fulldir=" fulldir) + (if (file-exists? linktree) ;; can't proceed without linktree + (begin + (if (not (file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg"))) + (debug:print-info 0 "Caching megatest.config in " fulldir "/.megatest.cfg") + (configf:write-alist configdat tmpfile) + (system (conc "ln -sf " tmpfile " " targfile)) + )))))))) + +(define (get-best-disk confdat) + (let* ((disks (hash-table-ref/default confdat "disks" #f)) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin + (if (common:low-noise-print 20 "no valid disks") + (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) + (exit 1))))))) + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat area-dat #!key (remtries 2)) + (let* ((configdat (megatest:area-configdat area-dat)) + (item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path "/" testtop-base)) + (test-path (conc disk-path "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (let ((rd (config-lookup configdat "setup" "linktree"))) + (if rd rd (conc (megatest:area-path area-dat) "/runs")))) + + (lnkbase (conc linktree "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path area-dat) + + (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (file-exists? linktree)) + (begin + (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... + (if (and (not (directory-exists? lnkbase)) + (not (file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print "ERROR: Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port))) + (create-directory lnkbase #t))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (file-exists? lnkpath) + (resolve-pathname lnkpath) + lnkpath) + testname "" area-dat) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 "Setting up sub test run area") + (debug:print 2 " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (let* ((ovrcmd (let ((cmd (config-lookup configdat "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 "ERROR: problem with running \"" cmd "\""))) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print 0 "ERROR: Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params area-dat) + (let ((toppath (megatest:area-path area-dat)) + (configdat (megatest:area-configdat area-dat))) + (change-directory toppath) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (list ;; (list "MT_TEST_RUN_DIR" work-area) + (list "MT_RUN_AREA_HOME" toppath) + (list "MT_TEST_NAME" test-name) + ;; (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + ;; (list "MT_TARGET" mt_target) + )) + (let* ((useshell (let ((ush (config-lookup configdat "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (launcher (config-lookup configdat "jobtools" "launcher")) + (runscript (config-lookup test-conf "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big + (diskspace (config-lookup test-conf "requirements" "diskspace")) + (memory (config-lookup test-conf "requirements" "memory")) + (hosts (config-lookup configdat "jobtools" "workhosts")) + (remote-megatest (config-lookup configdat "setup" "executable")) + (run-time-limit (or (configf:lookup test-conf "requirements" "runtimelim") + (configf:lookup configdat "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (item-path (item-list->path itemdat)) + (test-sig (conc test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + (setenv "MT_ITEMPATH" item-path) + (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:roll-up-pass-fail-counts run-id test-name item-path "LAUNCHED" area-dat) + (set! diskpath (get-best-disk configdat)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + (list 'transport (conc (megatest:area-transport area-dat))) ;; + ;; (list 'serverinf *server-info*) + (list 'toppath toppath) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default configdat "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir + (cond + ((and launcher hosts) ;; must be using ssh hostname + (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 "fullcmd: " fullcmd) + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default configdat "env-override" '()))) + (testprevvals (alist->env-vars + (hash-table-ref/default test-conf "pre-launch-env-overrides" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup configdat "setup" "launchwait") "no") #f #t)) + (launch-results (apply (if launchwait + cmd-run-with-stderr->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 "Launching completed, updating db") + (debug:print 2 "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory toppath)) + ;; added paren below after refactoring above routine. must have missed something? + ) + ADDED launch.scm-merge Index: launch.scm-merge ================================================================== --- /dev/null +++ launch.scm-merge @@ -0,0 +1,980 @@ + +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + +(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables) + +(import (prefix base64 base64:)) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit launch)) +(declare (uses common)) +(declare (uses configf)) +(declare (uses db)) +;; (declare (uses sdb)) +(declare (uses tdb)) +;; (declare (uses filedb)) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + + +(define (launch:runstep ezstep run-id test-id exit-info m tal) + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (logpro-used (file-exists? logpro-file))) + ;; NB// can safely assume we are in test-area directory + (debug:print 4 "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparms: " stepparms " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc "exec " stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid (process-run "/bin/bash" (list "-c" cmd)))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 "step " stepname " completed with exit code " (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let ((pid (process-run (conc "logpro " logpro-file " " (conc stepname ".html") " < " stepname ".log")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 "logpro for step " stepname " exited with code " (vector-ref exit-info 2))))) + + (let ((exinfo (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") ""))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna area-dat)) + (if logpro-used + (rmt:test-set-log! run-id test-id (conc stepname ".html") area-dat)) + ;; set the test final status + (let* ((this-step-status (cond + ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) + ((eq? (vector-ref exit-info 2) 0) 'pass) + (else 'fail))) + (overall-status (cond + ((eq? (vector-ref exit-info 3) 2) 'warn) ;; rollup-status + ((eq? (vector-ref exit-info 3) 0) 'pass) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING"))) + ) + (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (vector-ref exit-info 3)) + (case next-status + ((warn) + (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:execute encoded-cmd area-dat) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd))) + (setenv "MT_CMDINFO" encoded-cmd) + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + (transport (assoc/default 'transport cmdinfo)) + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + (port (assoc/default 'port cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc testpath "/" runscript))) + (if (and (file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + ;; (rollup-status 0) + ) + (change-directory top-path) + + ;; (set-signal-handler! signal/int (lambda () + + ;; WAS: Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; NOW: Do not run test test unless state is LAUNCHED + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + ;; This is flawed. It should be a single transaction that tests for NOT_STARTED and updates to REMOTEHOSTSTART + (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat))) + ;; + (if (equal? (db:test-get-state test-info) "LAUNCHED") ;; '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (begin + (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys area-dat)) + ;; (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup-for-run area-dat force: #t)) + (begin + (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + (change-directory toppath) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read))) ;; (read-config (conc toppath "/runconfigs.config") #f #t sections: (list "default" target)))) + ;; (setup-env-defaults (conc toppath "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each (lambda (section) + (for-each (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (setenv var (config:eval-string-in-environment val))) ;; val) + (debug:print 0 "ERROR: bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print 0 "ERROR: required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (configf:lookup (megatest:area-configdat area-dat) "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) + (set-item-env-vars itemdat) + (save-environment-as-files "megatest") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (vector #t #t #t 0)) + (job-thread #f) + (keep-going #t) + (runit (lambda () + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + + ;; (thread-sleep! 0.3) + (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING" area-dat) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if ezsteps + (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + (ezstepslst (hash-table-ref/default testconfig "ezsteps" '()))) + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + ;; check exit-info (vector-ref exit-info 1) + (if (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal))) + (if (and (steprun-good? logpro-used (vector-ref exit-info 2)) + (not (null? tal))) + (loop (car tal) (cdr tal) stepname))) + (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep)))))))) + (monitorjob (lambda () + (let* ((start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + (let loop ((minutes (calc-minutes))) + (begin + (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)) + (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) + (time-exceeded (> run-seconds runtlim))) + (if time-exceeded + (begin + (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) + #t) + #f))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f) + (if kill-job? + (begin + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id area-dat)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 "not killing process " pid " as it is not alive")))) + pids) + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (args:get-arg "-m") #f)) + (begin + (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (args:get-arg "-m") #f) + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. + (exit))) + (if keep-going + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if keep-going + (loop (calc-minutes))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (set! keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id area-dat))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" + ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run + ((eq? (vector-ref exit-info 3) 0) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (vector-ref exit-info 3) 1) "FAIL") + ((eq? (vector-ref exit-info 3) 2) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " (vector-ref exit-info 3)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + ;; (if (and (not (equal? item-path "")) + ;; (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) + (tests:summarize-items run-id test-id test-name #f) + (tests:summarize-test run-id test-id)) ;; don't force - just update if no + (mutex-unlock! m) + (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " + work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") + (if (not (vector-ref exit-info 1)) + (exit 4))))))) + +(define (launch:read-cached-config) + (if (get-environment-variable "MT_CMDINFO") ;; we are inside a test - do not reprocess configs + (let ((alistconfig (conc (get-environment-variable "MT_LINKTREE") "/" + (get-environment-variable "MT_TARGET") "/" + (get-environment-variable "MT_RUNNAME") "/" + ".megatest.cfg"))) + (if (file-exists? alistconfig) + (list (configf:read-alist alistconfig) + (get-environment-variable "MT_RUN_AREA_HOME")) + #f)) + #f)) + +(define (launch:read-megatest-config toppath) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))) + (if runname (setenv "MT_RUNNAME" runname)) + (find-and-read-config + (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + +;; set up the very basics needed for doing anything here. +(define (launch:setup-for-run area-dat #!key (force #f)) + ;; would set values for KEYS in the environment here for better support of env-override but + ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to + ;; pass on that idea for now + ;; special case + (let ((configdat (megatest:area-configdat area-dat))) + (if (or force (not (hash-table? configdat))) ;; no need to re-open on every call + (let* ((newconfiginfo (or (launch:read-cached-config) ;; no config cached - give up + (launch:read-megatest-config (megatest:area-path area-dat)))) + (configdat (car newconfiginfo)) + (toppath (cadr newconfiginfo))) + (megatest:area-configinfo-set! area-dat newconfiginfo) + (megatest:area-configdat-set! area-dat configdat) + (megatest:area-path-set! area-dat toppath) + (let* ((tmptransport (configf:lookup configdat "server" "transport")) + (transport (if tmptransport (string->symbol tmptransport) 'http))) + (if (member transport '(http rpc nmsg)) + (megatest:area-transport-set! area-dat transport) + (begin + (debug:print 0 "ERROR: Unrecognised transport " transport) + (exit)))) + (let ((linktree (configf:lookup configdat "setup" "linktree"))) ;; link tree is critical + (if linktree + (if (not (file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (begin + (debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config") + (exit 1))) + (if linktree + (let ((dbdir (conc linktree "/.db"))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (if (not (directory-exists? dbdir))(create-directory dbdir))) + (setenv "MT_LINKTREE" linktree)) + (begin + (debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section") + (exit 1))) + (if (and toppath + (directory-exists? toppath)) + (setenv "MT_RUN_AREA_HOME" toppath) + (begin + (debug:print 0 "ERROR: failed to find the top path to your Megatest area.") + (exit 1)))) + toppath)))) + +(define (launch:cache-config area-dat) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (let ((configdat (megatest:area-configdat area-dat))) + (if (and configdat + (args:get-arg "-runtests")) + (let* ((linktree (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (debug:print-info 0 "Have -runtests with target=" target ", runname=" runname ", fulldir=" fulldir) + (if (file-exists? linktree) ;; can't proceed without linktree + (begin + (if (not (file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg"))) + (debug:print-info 0 "Caching megatest.config in " fulldir "/.megatest.cfg") + (configf:write-alist configdat tmpfile) + (system (conc "ln -sf " tmpfile " " targfile)) + )))))))) + +(define (get-best-disk confdat) + (let* ((disks (hash-table-ref/default confdat "disks" #f)) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin + (if (common:low-noise-print 20 "no valid disks") + (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) + (exit 1))))))) + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat area-dat #!key (remtries 2)) + (let* ((configdat (megatest:area-configdat area-dat)) + (item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path "/" testtop-base)) + (test-path (conc disk-path "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (let ((rd (config-lookup configdat "setup" "linktree"))) + (if rd rd (conc (megatest:area-path area-dat) "/runs")))) + + (lnkbase (conc linktree "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path area-dat) + + (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (file-exists? linktree)) + (begin + (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... + (if (and (not (directory-exists? lnkbase)) + (not (file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print "ERROR: Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port))) + (create-directory lnkbase #t))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (file-exists? lnkpath) + (resolve-pathname lnkpath) + lnkpath) + testname "" area-dat) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 "Setting up sub test run area") + (debug:print 2 " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (let* ((ovrcmd (let ((cmd (config-lookup configdat "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 "ERROR: problem with running \"" cmd "\""))) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print 0 "ERROR: Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params area-dat) + (let ((toppath (megatest:area-path area-dat)) + (configdat (megatest:area-configdat area-dat))) + (change-directory toppath) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (list ;; (list "MT_TEST_RUN_DIR" work-area) + (list "MT_RUN_AREA_HOME" toppath) + (list "MT_TEST_NAME" test-name) + ;; (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + ;; (list "MT_TARGET" mt_target) + )) + (let* ((useshell (let ((ush (config-lookup configdat "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (launcher (config-lookup configdat "jobtools" "launcher")) + (runscript (config-lookup test-conf "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big + (diskspace (config-lookup test-conf "requirements" "diskspace")) + (memory (config-lookup test-conf "requirements" "memory")) + (hosts (config-lookup configdat "jobtools" "workhosts")) + (remote-megatest (config-lookup configdat "setup" "executable")) + (run-time-limit (or (configf:lookup test-conf "requirements" "runtimelim") + (configf:lookup configdat "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (item-path (item-list->path itemdat)) + (test-sig (conc test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + (setenv "MT_ITEMPATH" item-path) + (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:roll-up-pass-fail-counts run-id test-name item-path "LAUNCHED" area-dat) + (set! diskpath (get-best-disk configdat)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + (list 'transport (conc (megatest:area-transport area-dat))) ;; + ;; (list 'serverinf *server-info*) + (list 'toppath toppath) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default configdat "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + + ;; Moving launch logs to MT_RUN_AREA_HOME/logs + ;; + (let ((launchdir (configf:lookup *configdat* "setup" "launchdir"))) ;; (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir + (if (not launchdir) ;; default + (change-directory (conc *toppath* "/logs")) ;; can assume this exists + (case (string->symbol launchdir) + ((legacy)(change-directory work-area)) + (else (change-directory launchdir))))) + (cond + ((and launcher hosts) ;; must be using ssh hostname + (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 "fullcmd: " fullcmd) + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default configdat "env-override" '()))) + (testprevvals (alist->env-vars + (hash-table-ref/default test-conf "pre-launch-env-overrides" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup configdat "setup" "launchwait") "no") #f #t)) + (launch-results (apply (if launchwait + cmd-run-with-stderr->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> " work-area "/mt_launch.log 2>&1"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file (conc work-area "/mt_launch.log") + (lambda () + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 "Launching completed, updating db") + (debug:print 2 "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory toppath)) + ;; added paren below after refactoring above routine. must have missed something? + ) + ADDED launch.scm-original Index: launch.scm-original ================================================================== --- /dev/null +++ launch.scm-original @@ -0,0 +1,969 @@ + +;; Copyright 2006-2013, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;;====================================================================== +;; launch a task - this runs on the originating host, tests themselves +;; +;;====================================================================== + +(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables) + +(import (prefix base64 base64:)) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit launch)) +(declare (uses common)) +(declare (uses configf)) +(declare (uses db)) +;; (declare (uses sdb)) +(declare (uses tdb)) +;; (declare (uses filedb)) + +(include "common_records.scm") +(include "key_records.scm") +(include "db_records.scm") + +;;====================================================================== +;; ezsteps +;;====================================================================== + +;; ezsteps were going to be coded as +;; stepname[,predstep1,predstep2 ...] [{VAR1=first,second,third}] command to execute +;; BUT +;; now are +;; stepname {VAR=first,second,third ...} command ... +;; where the {VAR=first,second,third ...} is optional. + +;; given an exit code and whether or not logpro was used calculate OK/BAD +;; return #t if we are ok, #f otherwise +(define (steprun-good? logpro exitcode) + (or (eq? exitcode 0) + (and logpro (eq? exitcode 2)))) + +;; if handed a string, process it, else look for MT_CMDINFO +(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f)) + (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO")))) + (if enccmd + (common:read-encoded-string enccmd) + '()))) + + +(define (launch:runstep ezstep run-id test-id exit-info m tal) + (let* ((stepname (car ezstep)) ;; do stuff to run the step + (stepinfo (cadr ezstep)) + (stepparts (string-match (regexp "^(\\{([^\\}]*)\\}\\s*|)(.*)$") stepinfo)) + (stepparms (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each + (stepcmd (list-ref stepparts 3)) + (script "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\ + (logpro-file (conc stepname ".logpro")) + (html-file (conc stepname ".html")) + (logpro-used (file-exists? logpro-file))) + ;; NB// can safely assume we are in test-area directory + (debug:print 4 "ezsteps:\n stepname: " stepname " stepinfo: " stepinfo " stepparts: " stepparts + " stepparms: " stepparms " stepcmd: " stepcmd) + + ;; ;; first source the previous environment + ;; (let ((prev-env (conc ".ezsteps/" prevstep (if (string-search (regexp "csh") + ;; (get-environment-variable "SHELL")) ".csh" ".sh")))) + ;; (if (and prevstep (file-exists? prev-env)) + ;; (set! script (conc script "source " prev-env)))) + + ;; call the command using mt_ezstep + ;; (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "x") " " stepcmd)) + + (debug:print 4 "script: " script) + (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f area-dat) + ;; now launch the actual process + (call-with-environment-variables + (list (cons "PATH" (conc (get-environment-variable "PATH") ":."))) + (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1") + (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 + (pid (process-run "/bin/bash" (list "-c" cmd)))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1)))) + ))))) + (debug:print-info 0 "step " stepname " completed with exit code " (vector-ref exit-info 2)) + ;; now run logpro if needed + (if logpro-used + (let ((pid (process-run (conc "logpro " logpro-file " " (conc stepname ".html") " < " stepname ".log")))) + (let processloop ((i 0)) + (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (processloop (+ i 1))))) + (debug:print-info 0 "logpro for step " stepname " exited with code " (vector-ref exit-info 2))))) + + (let ((exinfo (vector-ref exit-info 2)) + (logfna (if logpro-used (conc stepname ".html") ""))) + (rmt:teststep-set-status! run-id test-id stepname "end" exinfo #f logfna area-dat)) + (if logpro-used + (rmt:test-set-log! run-id test-id (conc stepname ".html") area-dat)) + ;; set the test final status + (let* ((this-step-status (cond + ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) + ((eq? (vector-ref exit-info 2) 0) 'pass) + (else 'fail))) + (overall-status (cond + ((eq? (vector-ref exit-info 3) 2) 'warn) ;; rollup-status + ((eq? (vector-ref exit-info 3) 0) 'pass) + (else 'fail))) + (next-status (cond + ((eq? overall-status 'pass) this-step-status) + ((eq? overall-status 'warn) + (if (eq? this-step-status 'fail) 'fail 'warn)) + (else 'fail))) + (next-state ;; "RUNNING") ;; WHY WAS THIS CHANGED TO NOT USE (null? tal) ?? + (cond + ((null? tal) ;; more to run? + "COMPLETED") + (else "RUNNING"))) + ) + (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used + " this-step-status: " this-step-status " overall-status: " overall-status + " next-status: " next-status " rollup-status: " (vector-ref exit-info 3)) + (case next-status + ((warn) + (vector-set! exit-info 3 2) ;; rollup-status + ;; NB// test-set-status! does rdb calls under the hood + (tests:test-set-status! run-id test-id next-state "WARN" + (if (eq? this-step-status 'warn) "Logpro warning found" #f) + #f)) + ((pass) + (tests:test-set-status! run-id test-id next-state "PASS" #f #f)) + (else ;; 'fail + (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" + (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f) + ))) + logpro-used)) + +(define (launch:execute encoded-cmd area-dat) + (let* ((cmdinfo (common:read-encoded-string encoded-cmd))) + (setenv "MT_CMDINFO" encoded-cmd) + (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) + ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area + (top-path (assoc/default 'toppath cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area + (test-name (assoc/default 'test-name cmdinfo)) + (runscript (assoc/default 'runscript cmdinfo)) + (ezsteps (assoc/default 'ezsteps cmdinfo)) + ;; (runremote (assoc/default 'runremote cmdinfo)) + (transport (assoc/default 'transport cmdinfo)) + ;; (serverinf (assoc/default 'serverinf cmdinfo)) + (port (assoc/default 'port cmdinfo)) + (run-id (assoc/default 'run-id cmdinfo)) + (test-id (assoc/default 'test-id cmdinfo)) + (target (assoc/default 'target cmdinfo)) + (itemdat (assoc/default 'itemdat cmdinfo)) + (env-ovrd (assoc/default 'env-ovrd cmdinfo)) + (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar + (runname (assoc/default 'runname cmdinfo)) + (megatest (assoc/default 'megatest cmdinfo)) + (runtlim (assoc/default 'runtlim cmdinfo)) + (item-path (item-list->path itemdat)) + (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) + (keys #f) + (keyvals #f) + (fullrunscript (if (not runscript) + #f + (if (substring-index "/" runscript) + runscript ;; use unadultered if contains slashes + (let ((fulln (conc testpath "/" runscript))) + (if (and (file-exists? fulln) + (file-execute-access? fulln)) + fulln + runscript))))) ;; assume it is on the path + ;; (rollup-status 0) + ) + (change-directory top-path) + + ;; (set-signal-handler! signal/int (lambda () + + ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, + ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* + ;; + (let ((test-info (rmt:get-testinfo-state-status run-id test-id area-dat))) + (if (not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))) + (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") + (begin + (debug:print 0 "ERROR: test state is " (db:test-get-state test-info) ", cannot proceed") + (exit)))) + + (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name)) + (set! keys (rmt:get-keys area-dat)) + ;; (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process + ;; one of these is defunct/redundant ... + (if (not (launch:setup-for-run area-dat force: #t)) + (begin + (debug:print 0 "Failed to setup, exiting") + ;; (sqlite3:finalize! db) + ;; (sqlite3:finalize! tdb) + (exit 1))) + (change-directory toppath) + + ;; NOTE: Current order is to process runconfigs *before* setting the MT_ vars. This + ;; seems non-ideal but could well break stuff + ;; BUG? BUG? BUG? + + (let ((rconfig (full-runconfigs-read))) ;; (read-config (conc toppath "/runconfigs.config") #f #t sections: (list "default" target)))) + ;; (setup-env-defaults (conc toppath "/runconfigs.config") run-id (make-hash-table) keyvals target) + ;; (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) + ;; Now have runconfigs data loaded, set environment vars + (for-each (lambda (section) + (for-each (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if (and (string? var)(string? val)) + (begin + (setenv var (config:eval-string-in-environment val))) ;; val) + (debug:print 0 "ERROR: bad variable spec, " var "=" val)))) + (configf:get-section rconfig section))) + (list "default" target))) + (change-directory work-area) + (set! keyvals (keys:target->keyval keys target)) + ;; apply pre-overrides before other variables. The pre-override vars must not + ;; clobbers things from the official sources such as megatest.config and runconfigs.config + (if (string? set-vars) + (let ((varpairs (string-split set-vars ","))) + (debug:print 4 "varpairs: " varpairs) + (map (lambda (varpair) + (let ((varval (string-split varpair "="))) + (if (eq? (length varval) 2) + (let ((var (car varval)) + (val (cadr varval))) + (debug:print 1 "Adding pre-var/val " var " = " val " to the environment") + (setenv var val))))) + varpairs))) + (for-each + (lambda (varval) + (let ((var (car varval)) + (val (cadr varval))) + (if val + (setenv var val) + (begin + (debug:print 0 "ERROR: required variable " var " does not have a valid value. Exiting") + (exit))))) + (list + (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_ITEMPATH" item-path) + (list "MT_RUNNAME" runname) + (list "MT_MEGATEST" megatest) + (list "MT_TARGET" target) + (list "MT_LINKTREE" (configf:lookup (megatest:area-configdat area-dat) "setup" "linktree")) + (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) + + (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) + ;; (change-directory top-path) + ;; Can setup as client for server mode now + ;; (client:setup) + + + ;; environment overrides are done *before* the remaining critical envars. + (alist->env-vars env-ovrd) + (runs:set-megatest-env-vars run-id area-dat inkeys: keys inkeyvals: keyvals) + (set-item-env-vars itemdat) + (save-environment-as-files "megatest") + ;; open-run-close not needed for test-set-meta-info + ;; (tests:set-full-meta-info #f test-id run-id 0 work-area) + ;; (tests:set-full-meta-info test-id run-id 0 work-area) + (tests:set-full-meta-info #f test-id run-id 0 work-area 10) + + (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + (if (args:get-arg "-xterm") + (set! fullrunscript "xterm") + (if (and fullrunscript (not (file-execute-access? fullrunscript))) + (system (conc "chmod ug+x " fullrunscript)))) + ;; We are about to actually kick off the test + ;; so this is a good place to remove the records for + ;; any previous runs + ;; (db:test-remove-steps db run-id testname itemdat) + + (let* ((m (make-mutex)) + (kill-job? #f) + (exit-info (vector #t #t #t 0)) + (job-thread #f) + (keep-going #t) + (runit (lambda () + ;; (let-values + ;; (((pid exit-status exit-code) + ;; (run-n-wait fullrunscript))) + ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f) + ;; Since we should have a clean slate at this time there is no need to do + ;; any of the other stuff that tests:test-set-status! does. Let's just + ;; force RUNNING/n/a + + + ;; (thread-sleep! 0.3) + (tests:test-force-state-status! run-id test-id "RUNNING" "n/a") + (rmt:roll-up-pass-fail-counts run-id test-name item-path "RUNNING" area-dat) + ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here + + ;; if there is a runscript do it first + (if fullrunscript + (let ((pid (process-run fullrunscript))) + (rmt:test-set-top-process-pid run-id test-id pid area-dat) + (let loop ((i 0)) + (let-values + (((pid-val exit-status exit-code) (process-wait pid #t))) + (mutex-lock! m) + (vector-set! exit-info 0 pid) + (vector-set! exit-info 1 exit-status) + (vector-set! exit-info 2 exit-code) + (vector-set! exit-info 3 exit-code) ;; rollup status + (mutex-unlock! m) + (if (eq? pid-val 0) + (begin + (thread-sleep! 2) + (loop (+ i 1))) + ))))) + ;; then, if runscript ran ok (or did not get called) + ;; do all the ezsteps (if any) + (if ezsteps + (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? + (ezstepslst (hash-table-ref/default testconfig "ezsteps" '()))) + (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ... + (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) + ;; if ezsteps was defined then we are sure to have at least one step but check anyway + (if (not (> (length ezstepslst) 0)) + (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length") + (let loop ((ezstep (car ezstepslst)) + (tal (cdr ezstepslst)) + (prevstep #f)) + ;; check exit-info (vector-ref exit-info 1) + (if (vector-ref exit-info 1) + (let ((logpro-used (launch:runstep ezstep run-id test-id exit-info m tal))) + (if (and (steprun-good? logpro-used (vector-ref exit-info 2)) + (not (null? tal))) + (loop (car tal) (cdr tal) stepname))) + (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep)))))))) + (monitorjob (lambda () + (let* ((start-seconds (current-seconds)) + (calc-minutes (lambda () + (inexact->exact + (round + (- + (current-seconds) + start-seconds))))) + (kill-tries 0)) + ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) + ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) + (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) + (let loop ((minutes (calc-minutes))) + (begin + (set! kill-job? (or (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)) + (and runtlim (let* ((run-seconds (- (current-seconds) start-seconds)) + (time-exceeded (> run-seconds runtlim))) + (if time-exceeded + (begin + (debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim) + #t) + #f))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f) + (if kill-job? + (begin + (mutex-lock! m) + ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this + ;; section and the runit section? Or add a loop that tries three times with a 1/4 second + ;; between tries? + (let* ((pid1 (vector-ref exit-info 0)) + (pid2 (rmt:test-get-top-process-pid run-id test-id area-dat)) + (pids (delete-duplicates (filter number? (list pid1 pid2))))) + (if (not (null? pids)) + (begin + (for-each + (lambda (pid) + (handle-exceptions + exn + (begin + (debug:print-info 0 "Unable to kill process with pid " pid ", possibly already killed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (debug:print 0 "WARNING: Request received to kill job " pid) ;; " (attempt # " kill-tries ")") + (debug:print-info 0 "Signal mask=" (signal-mask)) + ;; (if (process:alive? pid) + ;; (begin + (map (lambda (pid-num) + (process-signal pid-num signal/term)) + (process:get-sub-pids pid)) + (thread-sleep! 5) + ;; (if (process:process-alive? pid) + (map (lambda (pid-num) + (handle-exceptions + exn + #f + (process-signal pid-num signal/kill))) + (process:get-sub-pids pid)))) + ;; (debug:print-info 0 "not killing process " pid " as it is not alive")))) + pids) + (tests:test-set-status! run-id test-id "KILLED" "KILLED" (args:get-arg "-m") #f)) + (begin + (debug:print 0 "ERROR: Nothing to kill, pid1=" pid1 ", pid2=" pid2) + (tests:test-set-status! run-id test-id "KILLED" "FAILED TO KILL" (args:get-arg "-m") #f) + ))) + (mutex-unlock! m) + ;; no point in sticking around. Exit now. + (exit))) + (if keep-going + (begin + (thread-sleep! 3) ;; (+ 3 (random 6))) ;; add some jitter to the call home time to spread out the db accesses + (if keep-going + (loop (calc-minutes))))))) + (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f)))) ;; NOTE: Checking twice for keep-going is intentional + (th1 (make-thread monitorjob "monitor job")) + (th2 (make-thread runit "run job"))) + (set! job-thread th2) + (thread-start! th1) + (thread-start! th2) + (thread-join! th2) + (debug:print-info 0 "Megatest exectute of test " test-name ", item path " item-path " complete. Notifying the db ...") + (set! keep-going #f) + (thread-join! th1) + (thread-sleep! 1) ;; give thread th1 a chance to be done TODO: Verify this is needed. At 0.1 I was getting fail to stop, increased to total of 1.1 sec. + (mutex-lock! m) + (let* ((item-path (item-list->path itemdat)) + ;; only state and status needed - use lazy routine + (testinfo (rmt:get-testinfo-state-status run-id test-id area-dat))) + ;; Am I completed? + (if (member (db:test-get-state testinfo) '("REMOTEHOSTSTART" "RUNNING")) ;; NOTE: It should *not* be REMOTEHOSTSTART but for reasons I don't yet understand it sometimes gets stuck in that state ;; (not (equal? (db:test-get-state testinfo) "COMPLETED")) + (let ((new-state (if kill-job? "KILLED" "COMPLETED") ;; (if (eq? (vector-ref exit-info 2) 0) ;; exited with "good" status + ;; "COMPLETED" + ;; (db:test-get-state testinfo))) ;; else preseve the state as set within the test + ) + (new-status (cond + ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run + ((eq? (vector-ref exit-info 3) 0) + ;; if the current status is AUTO then defer to the calculated value (i.e. leave this AUTO) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS")) + ((eq? (vector-ref exit-info 3) 1) "FAIL") + ((eq? (vector-ref exit-info 3) 2) + ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN) + (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) + (else "FAIL")))) ;; (db:test-get-status testinfo))) + (debug:print-info 1 "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (vector-ref exit-info 1) " and rollup-status of " (vector-ref exit-info 3)) + (tests:test-set-status! run-id + test-id + new-state + new-status + (args:get-arg "-m") #f) + ;; need to update the top test record if PASS or FAIL and this is a subtest + ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status! + )) + ;; for automated creation of the rollup html file this is a good place... + ;; (if (and (not (equal? item-path "")) + ;; (< (random (rmt:get-count-tests-running-for-testname run-id test-name)) 5)) + (tests:summarize-items run-id test-id test-name #f) + (tests:summarize-test run-id test-id)) ;; don't force - just update if no + (mutex-unlock! m) + (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " + work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n") + (if (not (vector-ref exit-info 1)) + (exit 4))))))) + +(define (launch:read-cached-config) + (if (get-environment-variable "MT_CMDINFO") ;; we are inside a test - do not reprocess configs + (let ((alistconfig (conc (get-environment-variable "MT_LINKTREE") "/" + (get-environment-variable "MT_TARGET") "/" + (get-environment-variable "MT_RUNNAME") "/" + ".megatest.cfg"))) + (if (file-exists? alistconfig) + (list (configf:read-alist alistconfig) + (get-environment-variable "MT_RUN_AREA_HOME")) + #f)) + #f)) + +(define (launch:read-megatest-config toppath) + (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))) + (if runname (setenv "MT_RUNNAME" runname)) + (find-and-read-config + (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config") + environ-patt: "env-override" + given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") + pathenvvar: "MT_RUN_AREA_HOME"))) + +;; set up the very basics needed for doing anything here. +(define (launch:setup-for-run area-dat #!key (force #f)) + ;; would set values for KEYS in the environment here for better support of env-override but + ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to + ;; pass on that idea for now + ;; special case + (let ((configdat (megatest:area-configdat area-dat))) + (if (or force (not (hash-table? configdat))) ;; no need to re-open on every call + (let* ((newconfiginfo (or (launch:read-cached-config) ;; no config cached - give up + (launch:read-megatest-config (megatest:area-path area-dat)))) + (configdat (car newconfiginfo)) + (toppath (cadr newconfiginfo))) + (megatest:area-configinfo-set! area-dat newconfiginfo) + (megatest:area-configdat-set! area-dat configdat) + (megatest:area-path-set! area-dat toppath) + (let* ((tmptransport (configf:lookup configdat "server" "transport")) + (transport (if tmptransport (string->symbol tmptransport) 'http))) + (if (member transport '(http rpc nmsg)) + (megatest:area-transport-set! area-dat transport) + (begin + (debug:print 0 "ERROR: Unrecognised transport " transport) + (exit)))) + (let ((linktree (configf:lookup configdat "setup" "linktree"))) ;; link tree is critical + (if linktree + (if (not (file-exists? linktree)) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Something went wrong when trying to create linktree dir at " linktree) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (exit 1)) + (create-directory linktree #t)))) + (begin + (debug:print 0 "ERROR: linktree not defined in [setup] section of megatest.config") + (exit 1))) + (if linktree + (let ((dbdir (conc linktree "/.db"))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: failed to create the " dbdir " area for your database files") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))) + (if (not (directory-exists? dbdir))(create-directory dbdir))) + (setenv "MT_LINKTREE" linktree)) + (begin + (debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section") + (exit 1))) + (if (and toppath + (directory-exists? toppath)) + (setenv "MT_RUN_AREA_HOME" toppath) + (begin + (debug:print 0 "ERROR: failed to find the top path to your Megatest area.") + (exit 1)))) + toppath)))) + +(define (launch:cache-config area-dat) + ;; if we have a linktree and -runtests and -target and the directory exists dump the config + ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg + (let ((configdat (megatest:area-configdat area-dat))) + (if (and configdat + (args:get-arg "-runtests")) + (let* ((linktree (get-environment-variable "MT_LINKTREE")) + (target (common:args-get-target)) + (runname (or (args:get-arg "-runname") + (args:get-arg ":runname"))) + (fulldir (conc linktree "/" + target "/" + runname))) + (debug:print-info 0 "Have -runtests with target=" target ", runname=" runname ", fulldir=" fulldir) + (if (file-exists? linktree) ;; can't proceed without linktree + (begin + (if (not (file-exists? fulldir)) + (create-directory fulldir #t)) ;; need to protect with exception handler + (if (and target + runname + (file-exists? fulldir)) + (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) + (targfile (conc fulldir "/.megatest.cfg"))) + (debug:print-info 0 "Caching megatest.config in " fulldir "/.megatest.cfg") + (configf:write-alist configdat tmpfile) + (system (conc "ln -sf " tmpfile " " targfile)) + )))))))) + +(define (get-best-disk confdat) + (let* ((disks (hash-table-ref/default confdat "disks" #f)) + (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) + (string->number (or m "10000"))))) + (if disks + (let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb + (if res + (cdr res) + (begin + (if (common:low-noise-print 20 "no valid disks") + (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!")) + (exit 1))))))) + +;; Desired directory structure: +;; +;; - - -. +;; | +;; v +;; - - -|- +;; +;; dir stored in test is: +;; +;; - - [ - ] +;; +;; All log file links should be stored relative to the top of link path +;; +;; - [ - ] +;; +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat area-dat #!key (remtries 2)) + (let* ((configdat (megatest:area-configdat area-dat)) + (item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it + (runname (if (string? run-info) ;; if we pass in a string as run-info use it as run-name. + run-info + (db:get-value-by-header (db:get-rows run-info) + (db:get-header run-info) + "runname"))) + ;; convert back to db: from rdb: - this is always run at server end + (target (string-intersperse (map cadr keyvals) "/")) + + (not-iterated (equal? "" item-path)) + + ;; all tests are found at /test-base or /test-base + (testtop-base (conc target "/" runname "/" testname)) + (test-base (conc testtop-base (if not-iterated "" "/") item-path)) + + ;; nb// if itempath is not "" then it is prefixed with "/" + (toptest-path (conc disk-path "/" testtop-base)) + (test-path (conc disk-path "/" test-base)) + + ;; ensure this exists first as links to subtests must be created there + (linktree (let ((rd (config-lookup configdat "setup" "linktree"))) + (if rd rd (conc (megatest:area-path area-dat) "/runs")))) + + (lnkbase (conc linktree "/" target "/" runname)) + (lnkpath (conc lnkbase "/" testname)) + (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)) + (lnktarget (conc lnkpath "/" item-path))) + + ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical + ;; rundir shortdir + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path area-dat) + + (debug:print 2 "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path) + (if (not (file-exists? linktree)) + (begin + (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) + (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) + ;; create the directory for the tests dir links, this is needed no matter what... + (if (and (not (directory-exists? lnkbase)) + (not (file-exists? lnkbase))) + (handle-exceptions + exn + (begin + (debug:print "ERROR: Problem creating linktree base at " lnkbase) + (print-error-message exn (current-error-port))) + (create-directory lnkbase #t))) + + ;; update the toptest record with its location rundir, cache the path + ;; This wass highly inefficient, one db write for every subtest, potentially + ;; thousands of unnecessary updates, cache the fact it was set and don't set it + ;; again. + + ;; Now create the link from the test path to the link tree, however + ;; if the test is iterated it is necessary to create the parent path + ;; to the iteration. use pathname-directory to trim the path by one + ;; level + (if (not not-iterated) ;; i.e. iterated + (let ((iterated-parent (pathname-directory (conc lnkpath "/" item-path)))) + (debug:print-info 2 "Creating iterated parent " iterated-parent) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory iterated-parent #t)))) + + (if (symbolic-link? lnkpath) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (delete-file lnkpath))) + + (if (not (or (file-exists? lnkpath) + (symbolic-link? lnkpath))) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-symbolic-link toptest-path lnkpath))) + + ;; NB - This was not working right - some top tests are not getting the path set!!! + ;; + ;; Do the setting of this record after the paths are created so that the shortdir can + ;; be set to the real directory location. This is safer for future clean up if the link + ;; tree is damaged or lost. + ;; + (if (not (hash-table-ref/default *toptest-paths* testname #f)) + (let* ((testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) ;; run-id testname item-path)) + (curr-test-path (if testinfo ;; (filedb:get-path *fdb* + ;; (db:get-path dbstruct + ;; (rmt:sdb-qry 'getstr + (db:test-get-rundir testinfo) ;; ) ;; ) + #f))) + (hash-table-set! *toptest-paths* testname curr-test-path) + ;; NB// Was this for the test or for the parent in an iterated test? + (rmt:general-call 'test-set-rundir-shortdir run-id lnkpath + (if (file-exists? lnkpath) + (resolve-pathname lnkpath) + lnkpath) + testname "" area-dat) + ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path) + (if (or (not curr-test-path) + (not (directory-exists? toptest-path))) + (begin + (debug:print-info 2 "Creating " toptest-path " and link " lnkpath) + (handle-exceptions + exn + #f ;; don't care to catch and deal with errors here for now. + (create-directory toptest-path #t)) + (hash-table-set! *toptest-paths* testname toptest-path))))) + + ;; The toptest path has been created, the link to the test in the linktree has + ;; been created. Now, if this is an iterated test the real test dir must be created + (if (not not-iterated) ;; this is an iterated test + (begin ;; (let ((lnktarget (conc lnkpath "/" item-path))) + (debug:print 2 "Setting up sub test run area") + (debug:print 2 " - creating run area in " test-path) + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to create directory " test-path ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit 1)) + (create-directory test-path #t)) + (debug:print 2 + " - creating link from: " test-path "\n" + " to: " lnktarget) + + ;; If there is already a symlink delete it and recreate it. + (handle-exceptions + exn + (begin + (debug:print 0 "ERROR: Failed to re-create link " lnktarget ((condition-property-accessor 'exn 'message) exn) ", exiting") + (exit)) + (if (symbolic-link? lnktarget) (delete-file lnktarget)) + (if (not (file-exists? lnktarget)) (create-symbolic-link test-path lnktarget))))) + + (if (not (directory? test-path)) + (create-directory test-path #t)) ;; this is a hack, I don't know why out of the blue this path does not exist sometimes + + (if (and test-src-path (directory? test-path)) + (begin + (let* ((ovrcmd (let ((cmd (config-lookup configdat "setup" "testcopycmd"))) + (if cmd + ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH + (string-substitute "TEST_TARG_PATH" test-path + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) + #f))) + (cmd (if ovrcmd + ovrcmd + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) + (status (system cmd))) + (if (not (eq? status 0)) + (debug:print 2 "ERROR: problem with running \"" cmd "\""))) + (list lnkpathf lnkpath )) + (if (and test-src-path (> remtries 0)) + (begin + (debug:print 0 "ERROR: Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) + ;; + (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) + (list #f #f))))) + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params area-dat) + (let ((toppath (megatest:area-path area-dat)) + (configdat (megatest:area-configdat area-dat))) + (change-directory toppath) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (list ;; (list "MT_TEST_RUN_DIR" work-area) + (list "MT_RUN_AREA_HOME" toppath) + (list "MT_TEST_NAME" test-name) + ;; (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + ;; (list "MT_TARGET" mt_target) + )) + (let* ((useshell (let ((ush (config-lookup configdat "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (launcher (config-lookup configdat "jobtools" "launcher")) + (runscript (config-lookup test-conf "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big + (diskspace (config-lookup test-conf "requirements" "diskspace")) + (memory (config-lookup test-conf "requirements" "memory")) + (hosts (config-lookup configdat "jobtools" "workhosts")) + (remote-megatest (config-lookup configdat "setup" "executable")) + (run-time-limit (or (configf:lookup test-conf "requirements" "runtimelim") + (configf:lookup configdat "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (item-path (item-list->path itemdat)) + (test-sig (conc test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id area-dat)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + (setenv "MT_ITEMPATH" item-path) + (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:roll-up-pass-fail-counts run-id test-name item-path "LAUNCHED" area-dat) + (set! diskpath (get-best-disk configdat)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + (list 'transport (conc (megatest:area-transport area-dat))) ;; + ;; (list 'serverinf *server-info*) + (list 'toppath toppath) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default configdat "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir + (cond + ((and launcher hosts) ;; must be using ssh hostname + (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 "fullcmd: " fullcmd) + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default configdat "env-override" '()))) + (testprevvals (alist->env-vars + (hash-table-ref/default test-conf "pre-launch-env-overrides" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup configdat "setup" "launchwait") "no") #f #t)) + (launch-results (apply (if launchwait + cmd-run-with-stderr->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) + (if (not launchwait) ;; give the OS a little time to allow the process to start + (thread-sleep! 0.01)) + (with-output-to-file "mt_launch.log" + (lambda () + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 "Launching completed, updating db") + (debug:print 2 "Launch results: " launch-results) + (if (not launch-results) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + launch-results)) + (change-directory toppath)) + ;; added paren below after refactoring above routine. must have missed something? + ) + ADDED loadwatch/launch-many Index: loadwatch/launch-many ================================================================== --- /dev/null +++ loadwatch/launch-many cannot compute difference between binary files ADDED loadwatch/queuefeeder Index: loadwatch/queuefeeder ================================================================== --- /dev/null +++ loadwatch/queuefeeder cannot compute difference between binary files ADDED loadwatch/queuefeeder-server Index: loadwatch/queuefeeder-server ================================================================== --- /dev/null +++ loadwatch/queuefeeder-server cannot compute difference between binary files ADDED megatest-version.scm-baseline Index: megatest-version.scm-baseline ================================================================== --- /dev/null +++ megatest-version.scm-baseline @@ -0,0 +1,7 @@ +;; Always use two or four digit decimal +;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. + +(declare (unit megatest-version)) + +(define megatest-version 1.6009) + ADDED megatest-version.scm-merge Index: megatest-version.scm-merge ================================================================== --- /dev/null +++ megatest-version.scm-merge @@ -0,0 +1,7 @@ +;; Always use two or four digit decimal +;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. + +(declare (unit megatest-version)) + +(define megatest-version 1.6013) + ADDED megatest-version.scm-original Index: megatest-version.scm-original ================================================================== --- /dev/null +++ megatest-version.scm-original @@ -0,0 +1,7 @@ +;; Always use two or four digit decimal +;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00.. + +(declare (unit megatest-version)) + +(define megatest-version 1.6014) + Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -16,11 +16,11 @@ ;; fake out readline usage of toplevel-command (define (toplevel-command . a) #f) (use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 readline apropos json http-client directory-utils rpc ;; (srfi 18) extras) - http-client srfi-18 extras format) ;; zmq extras) + http-client srfi-18 extras format posix-utils) ;; pathname-expand zmq extras) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) ADDED odboard Index: odboard ================================================================== --- /dev/null +++ odboard cannot compute difference between binary files ADDED refdb Index: refdb ================================================================== --- /dev/null +++ refdb cannot compute difference between binary files Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -9,10 +9,12 @@ ;; PURPOSE. ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking (srfi 18) posix-extras directory-utils) +;; pathname-expand) ;; pathname-expand will be needed in switch to chicken 4.10 + (import (prefix sqlite3 sqlite3:)) (declare (unit runs)) (declare (uses db)) (declare (uses common)) @@ -458,21 +460,22 @@ (let ((reglen (configf:lookup *configdat* "setup" "runqueue"))) (if (> (length (hash-table-keys test-records)) 0) (let* ((keep-going #t) (run-queue-retries 5) (th1 (make-thread (lambda () - (handle-exceptions - exn - (begin - (print-call-chain (current-error-port)) - (debug:print 0 "ERROR: failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn)) - (if (> run-queue-retries 0) - (begin - (set! run-queue-retries (- run-queue-retries 1)) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))) - (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))) - "runs:run-tests-queue")) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 "ERROR: failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)) + (if (> run-queue-retries 0) + (begin + (set! run-queue-retries (- run-queue-retries 1)) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))) + (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)) + "runs:run-tests-queue"))) (th2 (make-thread (lambda () ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ... (let ((run-ids (rmt:get-all-run-ids))) (for-each (lambda (run-id) (if keep-going ADDED setup.sh Index: setup.sh ================================================================== --- /dev/null +++ setup.sh @@ -0,0 +1,1 @@ +export PATH=$PWD/bin:$PATH ADDED tasks.scm.saved Index: tasks.scm.saved ================================================================== --- /dev/null +++ tasks.scm.saved @@ -0,0 +1,793 @@ +;; Copyright 2006-2012, Matthew Welland. +;; +;; This program is made available under the GNU GPL version 2.0 or +;; greater. See the accompanying file COPYING for details. +;; +;; This program is distributed WITHOUT ANY WARRANTY; without even the +;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. + +;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') + +(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) +(import (prefix sqlite3 sqlite3:)) + +(declare (unit tasks)) +(declare (uses db)) +(declare (uses rmt)) +(declare (uses common)) + +(include "task_records.scm") + +;;====================================================================== +;; Tasks db +;;====================================================================== + +;; wait up to aprox n seconds for a journal to go away +;; +(define (tasks:wait-on-journal path n #!key (remove #f)(waiting-msg #f)) + (if (not (string? path)) + (debug:print 0 "ERROR: Called tasks:wait-on-journal with path=" path " (not a string)") + (let ((fullpath (conc path "-journal"))) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)) + (debug:print 0 "tasks:wait-on-journal failed. Continuing on, you can ignore this call-chain") + #t) ;; if stuff goes wrong just allow it to move on + (let loop ((journal-exists (file-exists? fullpath)) + (count n)) ;; wait ten times ... + (if journal-exists + (begin + (if (and waiting-msg + (eq? (modulo n 30) 0)) + (debug:print 0 waiting-msg)) + (if (> count 0) + (begin + (thread-sleep! 1) + (loop (file-exists? fullpath) + (- count 1))) + (begin + (if remove (system (conc "rm -rf " fullpath))) + #f))) + #t)))))) + +(define (tasks:get-task-db-path) + (let* ((linktree (configf:lookup *configdat* "setup" "linktree")) + (dbpath (conc linktree "/.db"))) + dbpath)) + + + +;; If file exists AND +;; file readable +;; ==> open it +;; If file exists AND +;; file NOT readable +;; ==> open in-mem version +;; If file NOT exists +;; ==> open in-mem version +;; +(define (tasks:open-db #!key (numretries 4)) + (if *task-db* + *task-db* + (handle-exceptions + exn + (if (> numretries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)) + (thread-sleep! 1) + (tasks:open-db numretries (- numretries 1))) + (begin + (print-call-chain (current-error-port)) + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " exn=" (condition->list exn)))) + (let* ((dbpath (tasks:get-task-db-path)) + (dbfile (conc dbpath "/monitor.db")) + (avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away + (exists (file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (mdb (cond ;; what the hek is *toppath* doing here? + ((and (string? *toppath*)(file-write-access? *toppath*)) + (sqlite3:open-database dbfile)) + ((file-read-access? dbpath) (sqlite3:open-database dbfile)) + (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) + (handler (make-busy-timeout 36000))) + (if (and exists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control + (sqlite3:set-busy-handler! mdb handler) + (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) + ;; (if (or (and (not exists) + ;; (file-write-access? *toppath*)) + ;; (not (file-read-access? dbpath))) + ;; (begin + ;; + ;; TASKS QUEUE MOVED TO main.db + ;; + ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, + ;; action TEXT DEFAULT '', + ;; owner TEXT, + ;; state TEXT DEFAULT 'new', + ;; target TEXT DEFAULT '', + ;; name TEXT DEFAULT '', + ;; testpatt TEXT DEFAULT '', + ;; keylock TEXT, + ;; params TEXT, + ;; creation_time TIMESTAMP, + ;; execution_time TIMESTAMP);") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY, + pid INTEGER, + start_time TIMESTAMP, + last_update TIMESTAMP, + hostname TEXT, + username TEXT, + CONSTRAINT monitors_constraint UNIQUE (pid,hostname));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS servers (id INTEGER PRIMARY KEY, + pid INTEGER, + interface TEXT, + hostname TEXT, + port INTEGER, + pubport INTEGER, + start_time TIMESTAMP, + priority INTEGER, + state TEXT, + mt_version TEXT, + heartbeat TIMESTAMP, + transport TEXT, + run_id INTEGER);") + ;; CONSTRAINT servers_constraint UNIQUE (pid,hostname,port));") + (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS clients (id INTEGER PRIMARY KEY, + server_id INTEGER, + pid INTEGER, + hostname TEXT, + cmdline TEXT, + login_time TIMESTAMP, + logout_time TIMESTAMP DEFAULT -1, + CONSTRAINT clients_constraint UNIQUE (pid,hostname));") + + ;)) + (set! *task-db* (cons mdb dbpath)) + *task-db*)))) + +;;====================================================================== +;; Server and client management +;;====================================================================== + +;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname +(define (tasks:hostinfo-get-id vec) (vector-ref vec 0)) +(define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) +(define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) +(define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) +(define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) +(define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) +(define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) + +(define (tasks:server-lock-slot mdb run-id) + (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") + (if (< (tasks:num-in-available-state mdb run-id) 4) + (begin + (tasks:server-set-available mdb run-id) + ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. + (tasks:server-am-i-the-server? mdb run-id)) + #f)) + +;; register that this server may come online (first to register goes though with the process) +(define (tasks:server-set-available mdb run-id) + (sqlite3:execute + mdb + "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) + VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" + (current-process-id) ;; pid + (get-host-name) ;; hostname + -1 ;; port + -1 ;; pubport + (random 1000) ;; priority (used a tiebreaker on get-available) + "available" ;; state + (common:version-signature) ;; mt_version + -1 ;; interface + "http" ;; transport + run-id + )) + +(define (tasks:num-in-available-state mdb run-id) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-in-queue) + (set! res num-in-queue)) + mdb + "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available' AND (strftime('%s','now') - start_time) < 30 ;" + run-id) + res)) + +(define (tasks:num-servers-non-zero-running mdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (num-running) + (set! res num-running)) + mdb + "SELECT count(id) FROM servers WHERE run_id != 0 AND state = 'running';") + res)) + +(define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','dbprep','shutting-down') AND (strftime('%s','now') - start_time) > 50 AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" + (conc "defunct" tag) run-id)) + +(define (tasks:server-force-clean-run-record mdb run-id iface port tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" + (conc "defunct" tag) run-id iface port)) + +(define (tasks:server-delete-records-for-this-pid mdb tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" + (conc "defunct" tag) (get-host-name) (current-process-id))) + +(define (tasks:server-delete-record mdb server-id tag) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" + (conc "defunct" tag) server-id) + ;; use this opportuntity to clean out records over one month old or over 10 minutes old with port = -1 (i.e. a never used placeholder) + (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down','dbprep') AND (strftime('%s','now') - start_time) > 2628000;") + (sqlite3:execute mdb "DELETE FROM servers WHERE state like 'defunct%' AND port=-1 AND (strftime('%s','now') - start_time) > 600;") + ) + +(define (tasks:server-set-state! mdb server-id state) + (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) + +(define (tasks:server-set-interface-port mdb server-id interface port) + (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id)) + +;; Get random port not used in long time +;; +(define (tasks:server-get-next-port mdb) + (let* ((lownum 30000) + (highnum 64000) + (used-ports '()) + (get-rand-port (lambda () + (+ lownum (random (- highnum lownum))))) + (port-param (if (and (args:get-arg "-port") + (string->number (args:get-arg "-port"))) + (string->number (args:get-arg "-port")) + #f)) + ;; (config-port (if (and (config-lookup *configdat* "server" "port") + ;; (string->number (config-lookup *configdat* "server" "port"))) + ;; (string->number (config-lookup *configdat* "server" "port")) + ;; #f)) + ) + (sqlite3:for-each-row + (lambda (port) + (set! used-ports (cons port used-ports))) + mdb + "SELECT port FROM servers;") + (cond + ((and port-param res) (if (> res port-param) res port-param)) + (port-param port-param) + ;; ((and config-port res) (if (> res config-port) res config-port)) + ;; (config-port config-port) + (else + (let loop ((port (get-rand-port)) + (remtries 100)) + (if (member port used-ports) + (if (> remtries 0) + (loop (get-rand-port)(- remtries 1)) + (get-rand-port)) + port)))))) + +(define (tasks:server-am-i-the-server? mdb run-id) + (let* ((all (tasks:server-get-servers-vying-for-run-id mdb run-id)) + (first (if (null? all) + (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") + (sqlite3:finalize! mdb) + (exit 1)) + (car (db:get-rows all)))) + (header (db:get-header all)) + (id (db:get-value-by-header first header "id")) + (hostname (db:get-value-by-header first header "hostname")) + (pid (db:get-value-by-header first header "pid")) + (priority (db:get-value-by-header first header "priority"))) + (debug:print 0 "INFO: am-i-the-server got record " first) + ;; for now a basic check. add tiebreaking by priority later + (if (and (equal? hostname (get-host-name)) + (equal? pid (current-process-id))) + id + #f))) + +;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname") +;; to extract info from the structure returned +;; +(define (tasks:server-get-servers-vying-for-run-id mdb run-id) + (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time")) + (selstr (string-intersperse header ",")) + (res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (apply vector a b) res))) + mdb + (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running','dbprep') ORDER BY start_time DESC;") + run-id) + (vector header res))) + +(define (tasks:get-server mdb run-id #!key (retries 10)) + (let ((res #f) + (best #f)) + (handle-exceptions + exn + (begin + (print-call-chain (current-error-port)) + (debug:print 0 "WARNING: tasks:get-server db access error.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 0 " for run " run-id) + (print-call-chain (current-error-port)) + (if (> retries 0) + (begin + (debug:print 0 " trying call to tasks:get-server again in 10 seconds") + (thread-sleep! 10) + (tasks:get-server mdb run-id retries: (- retries 0))) + (debug:print 0 "10 tries of tasks:get-server all crashed and burned. Giving up and returning \"no server found\""))) + (sqlite3:for-each-row + (lambda (id interface port pubport transport pid hostname) + (set! res (vector id interface port pubport transport pid hostname))) + mdb + ;; removed: + ;; strftime('%s','now')-heartbeat < 10 AND mt_version = ? + "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers + WHERE run_id=? AND state='running' + ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id) + res))) + +(define (tasks:server-running-or-starting? mdb run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + mdb ;; NEEDS dbprep ADDED + "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) + res)) + +(define (tasks:server-running? mdb run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (id) + (set! res id)) + mdb ;; NEEDS dbprep ADDED + "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id) + res)) + +(define (tasks:need-server run-id) + (let ((forced (configf:lookup *configdat* "server" "required")) + (maxqry (cdr (rmt:get-max-query-average run-id))) + (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10")))) + (cond + (forced + (if (common:low-noise-print 60 run-id "server required is set") + (debug:print-info 0 "Server required is set, starting server.")) + #t) + ((> maxqry threshold) + (if (common:low-noise-print 60 run-id "Max query time execeeded") + (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, starting server.")) + #t) + (else + #f)))) + +;; try to start a server and wait for it to be available +;; +(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries) + ;; ensure a server is running for this run + (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)) + (delay-time 0)) + (if (and (not server-dat) + (< delay-time delay-max-tries)) + (begin + (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id) + (debug:print 0 "Try starting server for run-id " run-id)) + (server:kind-run run-id) + (thread-sleep! (min delay-time 5)) + (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1)))))) + +(define (tasks:get-all-servers mdb) + (let ((res '())) + (sqlite3:for-each-row + (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) + ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 + (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) + mdb + "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") + res)) + +;; no elegance here ... +;; +(define (tasks:kill-server hostname pid) + (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (unsetenv "TARGETHOST_LOGF") + (unsetenv "TARGETHOST")) + +;; look up a server by run-id and send it a kill, also delete the record for that server +;; +(define (tasks:kill-server-run-id run-id #!key (tag "default")) + (let* ((tdbdat (tasks:open-db)) + (sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) + (if sdat + (let ((hostname (vector-ref sdat 6)) + (pid (vector-ref sdat 5)) + (server-id (vector-ref sdat 0))) + (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed") + (debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid) + (tasks:kill-server hostname pid) + (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) ) + (debug:print-info 0 "No server found for run-id " run-id ", nothing to kill")) + ;; (sqlite3:finalize! tdb) + )) + +;;====================================================================== +;; M O N I T O R S +;;====================================================================== + +(define (tasks:remove-monitor-record mdb) + (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name))) + +(define (tasks:get-monitors mdb) + (let ((res '())) + (sqlite3:for-each-row + (lambda (a . rem) + (set! res (cons (apply vector a rem) res))) + mdb + "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;") + (reverse res) + )) + +(define (tasks:monitors->text-table monitors) + (let ((fmtstr "~4a~8a~20a~20a~10a~10a")) + (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n" + (string-intersperse + (map (lambda (monitor) + (format #f fmtstr + (tasks:monitor-get-id monitor) + (tasks:monitor-get-pid monitor) + (tasks:monitor-get-start_time monitor) + (tasks:monitor-get-last_update monitor) + (tasks:monitor-get-hostname monitor) + (tasks:monitor-get-username monitor))) + monitors) + "\n")))) + +;; update the last_update field with the current time and +;; if any monitors appear dead, remove them +(define (tasks:monitors-update mdb) + (sqlite3:execute mdb "UPDATE monitors SET last_update=strftime('%s','now') WHERE pid=? AND hostname=?;" + (current-process-id) + (get-host-name)) + (let ((deadlist '())) + (sqlite3:for-each-row + (lambda (id pid host last-update delta) + (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago") + (set! deadlist (cons id deadlist))) + mdb + "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;") + (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');"))) + ) +(define (tasks:register-monitor db mdb) + (let* ((pid (current-process-id)) + (hostname (get-host-name)) + (userinfo (user-information (current-user-id))) + (username (car userinfo))) + (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username) + (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);" + pid hostname username))) + +(define (tasks:get-num-alive-monitors mdb) + (let ((res 0)) + (sqlite3:for-each-row + (lambda (count) + (set! res count)) + mdb + "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;" + (car (user-information (current-user-id)))) + res)) + +;; +(define (tasks:start-monitor db mdb) + (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more + (debug:print-info 1 "Not starting monitor, already have more than two running") + (let* ((megatestdb (conc *toppath* "/megatest.db")) + (monitordbf (conc (configf:lookup *configdat* "setup" "linktree") "/.db/monitor.db")) + (last-db-update 0)) ;; (file-modification-time megatestdb))) + (task:register-monitor mdb) + (let loop ((count 0) + (next-touch 0)) ;; next-touch is the time where we need to update last_update + ;; if the db has been modified we'd best look at the task queue + (let ((modtime (file-modification-time megatestdbpath ))) + (if (> modtime last-db-update) + (tasks:process-queue db mdb last-db-update megatestdb next-touch)) + ;; WARNING: Possible race conditon here!! + ;; should this update be immediately after the task-get-action call above? + (if (> (current-seconds) next-touch) + (begin + (tasks:monitors-update mdb) + (loop (+ count 1)(+ (current-seconds) 240))) + (loop (+ count 1) next-touch))))))) + +;;====================================================================== +;; T A S K S Q U E U E +;; +;; NOTE:: These operate on task_queue which is in main.db +;; +;;====================================================================== + +;; NOTE: It might be good to add one more layer of checking to ensure +;; that no task gets run in parallel. + + + +;; register a task +(define (tasks:add dbstruct action owner target runname testpatt params) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time) + VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" + action + owner + target + runname + testpatt + (if params params ""))))) + +(define (keys:key-vals-hash->target keys key-params) + (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) ""))) + (if (> (length keys) 1) + (for-each (lambda (key) + (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) "")))) + (cdr keys))) + tmp)) + +;; for use from the gui, not ported +;; +;; (define (tasks:add-from-params mdb action keys key-params var-params) +;; (let ((target (keys:key-vals-hash->target keys key-params)) +;; (owner (car (user-information (current-user-id)))) +;; (runname (hash-table-ref/default var-params "runname" #f)) +;; (testpatts (hash-table-ref/default var-params "testpatts" "%")) +;; (params (hash-table-ref/default var-params "params" ""))) +;; (tasks:add mdb action owner target runname testpatts params))) + +;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old +;; +(define (tasks:snag-a-task dbstruct) + (let ((res #f) + (keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id)))))) + (db:with-db + dbstruct #f #t + (lambda (db) + ;; first randomly set a new to pid-hostname-hostname + (sqlite3:execute + db + "UPDATE tasks_queue SET keylock=? WHERE id IN + (SELECT id FROM tasks_queue + WHERE state='new' OR + (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR + state='reset' + ORDER BY RANDOM() LIMIT 1);" keytxt) + + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (apply vector id rem))) + db + "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt) + (if res ;; yep, have work to be done + (begin + (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;" + (tasks:task-get-id res)) + res) + #f))))) + +(define (tasks:reset-stuck-tasks dbstruct) + (let ((res '())) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:for-each-row + (lambda (id delta) + (set! res (cons id res))) + db + "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;") + (sqlite3:execute + db + (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');") + ))))) + +;; return all tasks in the tasks_queue table +;; +(define (tasks:get-tasks dbstruct types states) + (let ((res '())) + (db:with-db + dbstruct #f #f + (lambda (db) + (sqlite3:for-each-row + (lambda (id . rem) + (set! res (cons (apply vector id rem) res))) + db + (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time + FROM tasks_queue " + ;; WHERE + ;; state IN " statesstr " AND + ;; action IN " actionsstr + " ORDER BY creation_time DESC;")) + res)))) + +;; remove tasks given by a string of numbers comma separated +(define (tasks:remove-queue-entries dbstruct task-ids) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");"))))) + +(define (tasks:process-queue dbstruct) + (let* ((task (tasks:snag-a-task dbstruct)) + (action (if task (tasks:task-get-action task) #f))) + (if action (print "tasks:process-queue task: " task)) + (if action + (case (string->symbol action) + ((run) (tasks:start-run dbstruct task)) + ((remove) (tasks:remove-runs dbstruct task)) + ((lock) (tasks:lock-runs dbstruct task)) + ;; ((monitor) (tasks:start-monitor db task)) + ((rollup) (tasks:rollup-runs dbstruct task)) + ((updatemeta)(tasks:update-meta dbstruct task)) + ((kill) (tasks:kill-monitors dbstruct task)))))) + +(define (tasks:tasks->text tasks) + (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a")) + (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n" + (string-intersperse + (map (lambda (task) + (format #f fmtstr + (tasks:task-get-id task) + (tasks:task-get-action task) + (tasks:task-get-owner task) + (tasks:task-get-state task) + (tasks:task-get-target task) + (tasks:task-get-name task) + (tasks:task-get-test task) + ;; (tasks:task-get-item task) + (tasks:task-get-params task))) + tasks) "\n")))) + +(define (tasks:set-state dbstruct task-id state) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" + state + task-id)))) + +;;====================================================================== +;; Access using task key (stored in params; (hash-table->alist flags) hostname pid +;;====================================================================== + +(define (tasks:param-key->id dbstruct task-params) + (db:with-db + dbstruct #f #f + (lambda (db) + (handle-exceptions + exn + #f + (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;" + task-params))))) + +(define (tasks:set-state-given-param-key dbstruct param-key new-state) + (db:with-db + dbstruct #f #t + (lambda (db) + (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key)))) + +(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt) + (db:with-db + dbstruct #f #f + (lambda (db) + (handle-exceptions + exn + '() + (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE + params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + param-key state-patt action-patt test-patt))))) + + +(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt) + ;; (handle-exceptions + ;; exn + ;; '() + ;; (sqlite3:first-row + (let ((db (db:delay-if-busy (db:get-db dbstruct #f))) + (res '())) + (sqlite3:for-each-row + (lambda (a . b) + (set! res (cons (cons a b) res))) + db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue + WHERE + target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;" + target run-name state-patt action-patt test-patt) + res)) ;; ) + +;; kill any runner processes (i.e. processes handling -runtests) that match target/runname +;; +;; do a remote call to get the task queue info but do the killing as self here. +;; +(define (tasks:kill-runner target run-name) + (let ((records (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests")) + (hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string + (if (null? records) + (debug:print 0 "No run launching processes found for " target " / " run-name) + (debug:print 0 "Found " (length records) " run(s) to kill.")) + (for-each + (lambda (record) + (let* ((param-key (list-ref record 8)) + (match-dat (string-search hostpid-rx param-key))) + (if match-dat + (let ((hostname (cadr match-dat)) + (pid (string->number (caddr match-dat)))) + (debug:print 0 "Sending SIGINT to process " pid " on host " hostname) + (if (equal? (get-host-name) hostname) + (if (process:alive? pid) + (begin + (handle-exceptions + exn + (begin + (debug:print 0 "Kill of process " pid " on host " hostname " failed.") + (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) + #t) + (process-signal pid signal/int) + (thread-sleep! 5) + (if (process:alive? pid) + (process-signal pid signal/kill))))) + ;; (call-with-environment-variables + (let ((old-targethost (getenv "TARGETHOST"))) + (setenv "TARGETHOST" hostname) + (setenv "TARGETHOST_LOGF" "server-kills.log") + (system (conc "nbfake kill " pid)) + (if old-targethost (setenv "TARGETHOST" old-targethost)) + (unsetenv "TARGETHOST") + (unsetenv "TARGETHOST_LOGF")))) + (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in main.db")))) + records))) + +;; (define (tasks:start-run dbstruct mdb task) +;; (let ((flags (make-hash-table))) +;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (if (not (string=? (tasks:task-get-params task) "")) +;; (hash-table-set! flags "-setvars" (tasks:task-get-params task))) +;; (print "Starting run " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:run-tests db +;; (tasks:task-get-target task) +;; (tasks:task-get-name task) +;; (tasks:task-get-test task) +;; (tasks:task-get-item task) +;; (tasks:task-get-owner task) +;; flags) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) +;; +;; (define (tasks:rollup-runs db mdb task) +;; (let* ((flags (make-hash-table)) +;; (keys (db:get-keys db)) +;; (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) +;; ;; (hash-table-set! flags "-rerun" "NOT_STARTED") +;; (print "Starting rollup " task) +;; ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY +;; (runs:rollup-run db +;; keys +;; keyvals +;; (tasks:task-get-name task) +;; (tasks:task-get-owner task)) +;; (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) + ADDED testreadline Index: testreadline ================================================================== --- /dev/null +++ testreadline cannot compute difference between binary files ADDED testreadline-old Index: testreadline-old ================================================================== --- /dev/null +++ testreadline-old cannot compute difference between binary files ADDED testreadline-old.scm Index: testreadline-old.scm ================================================================== --- /dev/null +++ testreadline-old.scm @@ -0,0 +1,10 @@ +(use readline apropos) +(import readline) +(import apropos) +(gnu-history-install-file-manager + (string-append + (or (get-environment-variable "HOME") ".") "/.megatest_history")) +(current-input-port (make-gnu-readline-port "megatest> ")) +;; (current-input-port (make-readline-port)) +;; (install-history-file #f "/.csi.history") +(repl) ADDED testreadline.scm Index: testreadline.scm ================================================================== --- /dev/null +++ testreadline.scm @@ -0,0 +1,7 @@ +(use readline apropos) +(import readline) +(import apropos) +(import csi) +(current-input-port (make-readline-port)) +(install-history-file #f "/.csi.history") +(repl) Index: tests/fullrun/configs/mt_include_2.config ================================================================== --- tests/fullrun/configs/mt_include_2.config +++ tests/fullrun/configs/mt_include_2.config @@ -1,2 +1,2 @@ [disks] -disk0 #{getenv MT_RUN_AREA_HOME}/tmp/mt_runs +disk0 #{scheme (create-directory "#{getenv MT_RUN_AREA_HOME}/tmp/mt_runs" #t)}