Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1248,10 +1248,57 @@ (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) ;; (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) (list #f #f))))) +;; +(define (launch-test-standalone test-work-dir) + (when (not (directory-exists? test-work-dir)) + (debug:print-error 0 *default-log-port* "Cannot launch. test-work-dir for lauched test does not exist, cannot proceed with launch: "test-work-dir) + (exit 1)) + (change-directory test-work-dir) + (let* ((launch-dat-file (conc test-work-dir "/launch.dat"))) + (if (not (common:file-exists? launch-dat-file)) + ;; error and exit + #f + (let* ((launch-info (with-input-from-file launch-dat-file read)) + (run-id (alist-ref 'run-id launch-info)) + (test-id (alist-ref 'test-id launch-info)) + (work-area (alist-ref 'work-area launch-info)) + (fullcmd (alist-ref 'fullcmd launch-info)) + (launchwait (alist-ref 'launchwait launch-info)) + (useshell (alist-ref 'useshell launch-info)) + (launch-results-prev + (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success + (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) + (exit-code (if launchwait (cadr launch-results-prev) 0)) + ) + (if success + (tests:test-set-status! run-id test-id "LAUNCHED" "enqueued" #f #f) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (print "exit code => "exit-code) + + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launcher exit code: " exit-code) + success)))) + ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) @@ -1280,11 +1327,11 @@ (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) (thread-sleep! (- launch-delay delta)) (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) + ;;(change-directory *toppath*) (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) (append (list (list "MT_RUN_AREA_HOME" *toppath*) (list "MT_TEST_NAME" test-name) @@ -1433,50 +1480,31 @@ itemdat))) (testprevvals (alist->env-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) ;; Launchwait defaults to true, must override it to turn off wait (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd))))) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) + (launch-info (list + (cons 'run-id run-id) + (cons 'test-id test-id) + (cons 'work-area work-area) + (cons 'fullcmd fullcmd) + (cons 'launchwait launchwait) + (cons 'useshell useshell))) + (launch-dat-file (conc work-area "/launch.dat")) + (write-result (with-output-to-file launch-dat-file + (lambda () (pp launch-info)))) + (launch-cmd (conc "megatest -start-dir "*toppath*" -internal-launch-test "work-area" &")) + ) + + (system launch-cmd) + + (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*))) + (change-directory *toppath*) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + write-result)))) ;; recover a test where the top controlling mtest may have died ;; (define (launch:recover-test run-id test-id) ;; this function is called on the test run host via ssh Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -277,10 +277,11 @@ "-m" "-rerun" "-days" "-rename-run" "-to" + "-internal-launch-test" ;; values and messages ":category" ":variable" ":value" ":expected" @@ -1688,10 +1689,21 @@ (lambda (target runname keys keyvals) (runs:rollup-run keys keyvals (or (args:get-arg "-runname")(args:get-arg ":runname") ) user)))) + +;;====================================================================== +;; launch test in separate call; takes test run dir as argument. +;;====================================================================== +(if (args:get-arg "-internal-launch-test") + (let ((toppath (launch:setup))) + (launch-test-standalone (args:get-arg "-internal-launch-test")) + (set! *didsomething* #t))) + + + ;;====================================================================== ;; Lock or unlock a run ;;====================================================================== Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -13,11 +13,10 @@ ;; Process convience utils ;;====================================================================== (use regex) (declare (unit process)) -;;(declare (uses common)) (define (process:conservative-read port) (let loop ((res "")) (if (not (eof-object? (peek-char port))) (loop (conc res (read-char port))) @@ -45,10 +44,36 @@ (begin (close-input-port fh) (close-input-port fhe) (close-output-port fho) result))))) ;; ) + +(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + ;(close-input-port fh) + ;(close-input-port fhe) + ;(close-output-port fho) + (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) + (list result (if normalexit? exitstatus -1)))))))) (define (process:cmd-run-proc-each-line cmd proc . params) ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) (handle-exceptions exn