@@ -1355,12 +1355,20 @@ (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") (rmt:set-var key (current-seconds)) (rmt:find-and-mark-incomplete run-id #f)))) - +(defstruct launch:ajt + (vars '()) + (exekey #f) + (host-type #f) + (test-sig #f) + (cmdline #f)) +;; append vars +(define (launch:ajt-add-vars dat vars) + (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host @@ -1370,31 +1378,38 @@ (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* (;; locking code removed from here commented out and pasted at end of file (item-path (item-list->path itemdat)) (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - ;; launch-mode will be 'adjutant or 'normal - (launch-mode (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal")))) + ;; launcher-mode will be 'adjutant or 'normal + (launcher-mode (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal"))) + (ajtdat (make-launch:ajt))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0))) (if (> launch-delay delta) (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) (thread-sleep! (- launch-delay delta)) (loop (- (current-seconds) *last-launch*) launch-delay)))) (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) + (let ((var-list (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat))) + ;; consolidate this code with the code in megatest.scm for + ;; "-execute", *maybe* - the longer they are set the longer + ;; each launch takes (must be non-overlapping with the vars) + (alist->env-vars var-list) + ;; the var-list into the ajtdat adjutant record whether it is needed or not. + (launch:ajt-add-vars ajtdat var-list)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed ;; for tconfig, why do we allow fallback to test-conf? (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) (begin (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") @@ -1432,10 +1447,19 @@ (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '()) (if (configf:lookup *configdat* "misc" "profilesw") (list (configf:lookup *configdat* "misc" "profilesw")) '())))) + ;; save the test-sig in the ajtdat record + (launch:ajt-test-sig-set! ajtdat test-sig) + ;; go ahead and figure out if we have a host-type from the + ;; launcher call above and save it in the ajtdat record + (if (and (eq? launcher-mode 'adjutant) + (list? launcher) + (> (length launcher) 1)) + (launch:ajt-host-type-set! ajtdat (car launcher))) + ;; (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) ;; (if launcher (set! launcher (string-split launcher))) ;; yuk! @@ -1493,64 +1517,92 @@ (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) + ;; save the cmdparms in the ajtdat + (launch:ajt-exekey-set! ajtdat cmdparms) ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + + ;; save the command line for adjutant mode (might never be needed but best to assemble it here) + (launch:ajt-cmdline-set! ajtdat (string-intersperse + (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) (launcher (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) (else (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 *default-log-port* "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 *default-log-port* "fullcmd: " fullcmd) (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + (let* ((env-override-vars (hash-table-ref/default *configdat* "env-override" '())) + (commonprevvals (alist->env-vars env-override-vars)) + (misc-vars (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path)) + itemdat)) + (miscprevvals (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute" + (test-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '())) + (testprevvals (alist->env-vars test-vars)) + ;; Launchwait defaults to true, must override it to turn off wait (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr-and-exitcode->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd)))) + ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + (launch-results-prev (if (eq? launcher-mode 'adjutant) + '(0 #t) ;; just some fake data to fool downstream but non-applicable code + (apply (if launchwait + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd))))) (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) - (if (not success) + + (launch:ajt-add-vars ajtdat env-override-vars) + (launch:ajt-add-vars ajtdat misc-vars) + (launch:ajt-add-vars ajtdat test-vars) + + ;; if in adjutant mode we register the job in the jobs_queue + ;; then fire off an adjutant runner + ;; + (if (eq? launcher-mode 'adjutant) + (let* ((adjutant-runner-cmd (append (cdr launcher) + (list remote-megatest "-adjutant" + (launch:ajt-host-type ajtdat) + "-start-dir" *toppath*))) + (adj-cmd (conc (string-intersperse (map conc adjutant-runner-cmd) " ") + "&"))) + (rmt:no-sync-add-job + (launch:ajt-host-type ajtdat) + (conc (launch:ajt-vars ajtdat)) + (launch:ajt-exekey ajtdat) + (launch:ajt-cmdline ajtdat)) + (print "adj-cmd: " adj-cmd) + (system adj-cmd) + )) + + (if (not success) (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () @@ -1572,10 +1624,14 @@ (process-signal (current-process-id) signal/kill) )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) + ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + ;; the unlock previously was further up. This seemed wrong as we should not proceed until the + ;; vars have been reset. + (mutex-unlock! *launch-setup-mutex*) launch-results)) (change-directory *toppath*) (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) ;; recover a test where the top controlling mtest may have died