ADDED launch-test.scm Index: launch-test.scm ================================================================== --- /dev/null +++ launch-test.scm @@ -0,0 +1,255 @@ +;; +(define (launch-test-standalone test-work-dir) + (when (not (directory-exists? test-work-dir)) + (debug:print-error 0 *default-log-port* "Cannot launch. test-work-dir for lauched test does not exist, cannot proceed with launch: "test-work-dir) + (exit 1)) + (change-directory test-work-dir) + (let* ((launch-dat-file (conc test-run-dir "/launch.dat"))) + (if (not (common:file-exists? launch-dat-file)) + ;; error and exit + #f + (let* ((launch-info (with-input-from-file launch-dat-file read)) + (run-id (alist-ref 'run-id launch-info)) + (test-id (alist-ref 'test-id launch-info)) + (work-area (alist-ref 'work-area launch-info)) + (fullcmd (alist-ref 'fullcmd launch-info)) + (launchwait (alist-ref 'launchwait launch-info)) + (launch-results-prev + (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. + process:cmd-run-with-stderr-and-exitcode->list + process-run) + (if useshell + (let ((cmdstr (string-intersperse fullcmd " "))) + (if launchwait + cmdstr + (conc cmdstr " >> mt_launch.log 2>&1 &"))) + (car fullcmd)) + (if useshell + '() + (cdr fullcmd)))) + (success + (if launchwait (equal? 0 (cadr launch-results-prev)) #t))) + (if success + (tests:test-set-status! run-id test-id "LAUNCHED" "enqueued" #f #f) + (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) + (with-output-to-file "mt_launch.log" + (lambda () + (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) + (if (list? launch-results) + (apply print launch-results) + (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) + #:append)) + (debug:print 2 *default-log-port* "Launching completed, updating db") + (debug:print 2 *default-log-port* "Launch results: " launch-results) + + + + + + success)))) + +;; 1. look though disks list for disk with most space +;; 2. create run dir on disk, path name is meaningful +;; 3. create link from run dir to megatest runs area +;; 4. remotely run the test on allocated host +;; - could be ssh to host from hosts table (update regularly with load) +;; - could be netbatch +;; (launch-test db (cadr status) test-conf)) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) + (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex + (let* ( ;; (lock-key (conc "test-" test-id)) + ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) + ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds + ;; (if (car lock) + ;; #t + ;; (if (> (current-seconds) expire-time) + ;; (begin + ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) + ;; (rmt:no-sync-del! lock-key) ;; destroy the lock + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; + ;; (begin + ;; (thread-sleep! 1) + ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) + (item-path (item-list->path itemdat)) + (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) + (let loop ((delta (- (current-seconds) *last-launch*)) + (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) + (if (> launch-delay delta) + (begin + (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. + (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) + (thread-sleep! (- launch-delay delta)) + (loop (- (current-seconds) *last-launch*) launch-delay)))) + ;;(change-directory *toppath*) + (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) + (append + (list + (list "MT_RUN_AREA_HOME" *toppath*) + (list "MT_TEST_NAME" test-name) + (list "MT_RUNNAME" runname) + (list "MT_ITEMPATH" item-path) + (list "MT_CONTOUR" contour) + ) + itemdat)) + (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed + ;; for tconfig, why do we allow fallback to test-conf? + (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) + (begin + (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") + test-conf))) ;; force re-read now that all vars are set + (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) + (if ush + (if (equal? ush "no") ;; must use "no" to NOT use shell + #f + ush) + #t))) ;; default is yes + (runscript (config-lookup tconfig "setup" "runscript")) + (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big + ;; (diskspace (config-lookup tconfig "requirements" "diskspace")) + ;; (memory (config-lookup tconfig "requirements" "memory")) + ;; (hosts (config-lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed + (remote-megatest (config-lookup *configdat* "setup" "executable")) + (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") + (configf:lookup *configdat* "setup" "runtimelim"))) + ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to + ;; allow running from dashboard. Extract the path + ;; from the called megatest and convert dashboard + ;; or dboard to megatest + (local-megatest (let* ((lm (car (argv))) + (dir (pathname-directory lm)) + (exe (pathname-strip-directory lm))) + (conc (if dir (conc dir "/") "") + (case (string->symbol exe) + ((dboard) "../megatest") + ((mtest) "../megatest") + ((dashboard) "megatest") + (else exe))))) + (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) + (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path + (work-area #f) + (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all + (diskpath #f) + (cmdparms #f) + (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) + (mt-bindir-path #f) + (testinfo (rmt:get-test-info-by-id run-id test-id)) + (mt_target (string-intersperse (map cadr keyvals) "/")) + (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) + (if (args:get-arg "-logging")(list "-logging") '())))) + ;; (if hosts (set! hosts (string-split hosts))) + ;; set the megatest to be called on the remote host + (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) + (set! mt-bindir-path (pathname-directory remote-megatest)) + (if launcher (set! launcher (string-split launcher))) + ;; set up the run work area for this test + (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run + (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir + (begin + (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) + (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record + + ;; prevent overlapping actions - set to LAUNCHED as early as possible + ;; + ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail + (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) + (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) + ;; (pp (hash-table->alist tconfig)) + (set! diskpath (get-best-disk *configdat* tconfig)) + (if diskpath + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) + (set! work-area (car dat)) + (set! toptest-work-area (cadr dat)) + (debug:print-info 2 *default-log-port* "Using work area " work-area)) + (begin + (set! work-area (conc test-path "/tmp_run")) + (create-directory work-area #t) + (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) + (set! cmdparms (base64:base64-encode + (z3:encode-buffer + (with-output-to-string + (lambda () ;; (list 'hosts hosts) + (write (list (list 'testpath test-path) + ;; (list 'transport (conc *transport-type*)) + ;; (list 'serverinf *server-info*) + (list 'homehost (let* ((hhdat (common:get-homehost))) + (if hhdat + (car hhdat) + #f))) + (list 'serverurl (if *runremote* + (remote-server-url *runremote*) + #f)) ;; + (list 'areaname (common:get-testsuite-name)) + (list 'toppath *toppath*) + (list 'work-area work-area) + (list 'test-name test-name) + (list 'runscript runscript) + (list 'run-id run-id ) + (list 'test-id test-id ) + ;; (list 'item-path item-path ) + (list 'itemdat itemdat ) + (list 'megatest remote-megatest) + (list 'ezsteps ezsteps) + (list 'target mt_target) + (list 'contour contour) + (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) + (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) + (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) + (list 'runname runname) + (list 'mt-bindir-path mt-bindir-path)))))))) + + ;; clean out step records from previous run if they exist + ;; (rmt:delete-test-step-records run-id test-id) + ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway + (if (common:file-exists? work-area) + (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir + (cond + ;; ((and launcher hosts) ;; must be using ssh hostname + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) + (launcher + (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) + ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) + (else + (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) + (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) + ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) + (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) + (debug:print 1 *default-log-port* "Launching " work-area) + ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done + (debug:print 4 *default-log-port* "fullcmd: " fullcmd) + (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. + (let* ((commonprevvals (alist->env-vars + (hash-table-ref/default *configdat* "env-override" '()))) + (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" + (append (list (list "MT_TEST_RUN_DIR" work-area) + (list "MT_TEST_NAME" test-name) + (list "MT_ITEM_INFO" (conc itemdat)) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" mt_target) + (list "MT_ITEMPATH" item-path) + ) + itemdat))) + (testprevvals (alist->env-vars + (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) + ;; Launchwait defaults to true, must override it to turn off wait + (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) + (launch-info (list + (cons 'run-id run-id) + (cons 'test-id test-id) + (cons 'work-area work-area) + (cons 'fullcmd fullcmd) + (cons 'launchwait launchwait))) + (launch-dat-file (conc work-area "/launch.dat")) + (write-result (with-output-to-file launch-dat-file + (lambda () (pp launch-info)))) + (lauch-cmd (conc "megatest -start-dir "*toppath*" -internal-launch-test "work-area" &")) + ) + + (system launch-cmd) + + (alist->env-vars miscprevvals) + (alist->env-vars testprevvals) + (alist->env-vars commonprevvals) + (change-directory *toppath*) + (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. + write-result)))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -1248,236 +1248,11 @@ (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) ;; (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat remtries: (- remtries 1))) (list #f #f))))) -;; 1. look though disks list for disk with most space -;; 2. create run dir on disk, path name is meaningful -;; 3. create link from run dir to megatest runs area -;; 4. remotely run the test on allocated host -;; - could be ssh to host from hosts table (update regularly with load) -;; - could be netbatch -;; (launch-test db (cadr status) test-conf)) -(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) - (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex - (let* ( ;; (lock-key (conc "test-" test-id)) - ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) - ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds - ;; (if (car lock) - ;; #t - ;; (if (> (current-seconds) expire-time) - ;; (begin - ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) - ;; (rmt:no-sync-del! lock-key) ;; destroy the lock - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; - ;; (begin - ;; (thread-sleep! 1) - ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) - (item-path (item-list->path itemdat)) - (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) - (let loop ((delta (- (current-seconds) *last-launch*)) - (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) - (if (> launch-delay delta) - (begin - (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. - (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) - (thread-sleep! (- launch-delay delta)) - (loop (- (current-seconds) *last-launch*) launch-delay)))) - (change-directory *toppath*) - (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars) - (append - (list - (list "MT_RUN_AREA_HOME" *toppath*) - (list "MT_TEST_NAME" test-name) - (list "MT_RUNNAME" runname) - (list "MT_ITEMPATH" item-path) - (list "MT_CONTOUR" contour) - ) - itemdat)) - (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed - ;; for tconfig, why do we allow fallback to test-conf? - (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) - (begin - (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") - test-conf))) ;; force re-read now that all vars are set - (useshell (let ((ush (config-lookup *configdat* "jobtools" "useshell"))) - (if ush - (if (equal? ush "no") ;; must use "no" to NOT use shell - #f - ush) - #t))) ;; default is yes - (runscript (config-lookup tconfig "setup" "runscript")) - (ezsteps (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big - ;; (diskspace (config-lookup tconfig "requirements" "diskspace")) - ;; (memory (config-lookup tconfig "requirements" "memory")) - ;; (hosts (config-lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed - (remote-megatest (config-lookup *configdat* "setup" "executable")) - (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") - (configf:lookup *configdat* "setup" "runtimelim"))) - ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to - ;; allow running from dashboard. Extract the path - ;; from the called megatest and convert dashboard - ;; or dboard to megatest - (local-megatest (let* ((lm (car (argv))) - (dir (pathname-directory lm)) - (exe (pathname-strip-directory lm))) - (conc (if dir (conc dir "/") "") - (case (string->symbol exe) - ((dboard) "../megatest") - ((mtest) "../megatest") - ((dashboard) "megatest") - (else exe))))) - (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) - (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path - (work-area #f) - (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all - (diskpath #f) - (cmdparms #f) - (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) - (mt-bindir-path #f) - (testinfo (rmt:get-test-info-by-id run-id test-id)) - (mt_target (string-intersperse (map cadr keyvals) "/")) - (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) - (if (args:get-arg "-logging")(list "-logging") '())))) - ;; (if hosts (set! hosts (string-split hosts))) - ;; set the megatest to be called on the remote host - (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) - (set! mt-bindir-path (pathname-directory remote-megatest)) - (if launcher (set! launcher (string-split launcher))) - ;; set up the run work area for this test - (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run - (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir - (begin - (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) - (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record - - ;; prevent overlapping actions - set to LAUNCHED as early as possible - ;; - ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail - (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) - (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) - ;; (pp (hash-table->alist tconfig)) - (set! diskpath (get-best-disk *configdat* tconfig)) - (if diskpath - (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) - (set! work-area (car dat)) - (set! toptest-work-area (cadr dat)) - (debug:print-info 2 *default-log-port* "Using work area " work-area)) - (begin - (set! work-area (conc test-path "/tmp_run")) - (create-directory work-area #t) - (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run"))) - (set! cmdparms (base64:base64-encode - (z3:encode-buffer - (with-output-to-string - (lambda () ;; (list 'hosts hosts) - (write (list (list 'testpath test-path) - ;; (list 'transport (conc *transport-type*)) - ;; (list 'serverinf *server-info*) - (list 'homehost (let* ((hhdat (common:get-homehost))) - (if hhdat - (car hhdat) - #f))) - (list 'serverurl (if *runremote* - (remote-server-url *runremote*) - #f)) ;; - (list 'areaname (common:get-testsuite-name)) - (list 'toppath *toppath*) - (list 'work-area work-area) - (list 'test-name test-name) - (list 'runscript runscript) - (list 'run-id run-id ) - (list 'test-id test-id ) - ;; (list 'item-path item-path ) - (list 'itemdat itemdat ) - (list 'megatest remote-megatest) - (list 'ezsteps ezsteps) - (list 'target mt_target) - (list 'contour contour) - (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) - (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) - (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) - (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path)))))))) - - ;; clean out step records from previous run if they exist - ;; (rmt:delete-test-step-records run-id test-id) - ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway - (if (common:file-exists? work-area) - (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir - (cond - ;; ((and launcher hosts) ;; must be using ssh hostname - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) - (launcher - (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) - ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) - (else - (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) - (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) - ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) - (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) - (debug:print 1 *default-log-port* "Launching " work-area) - ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done - (debug:print 4 *default-log-port* "fullcmd: " fullcmd) - (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. - (let* ((commonprevvals (alist->env-vars - (hash-table-ref/default *configdat* "env-override" '()))) - (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" - (append (list (list "MT_TEST_RUN_DIR" work-area) - (list "MT_TEST_NAME" test-name) - (list "MT_ITEM_INFO" (conc itemdat)) - (list "MT_RUNNAME" runname) - (list "MT_TARGET" mt_target) - (list "MT_ITEMPATH" item-path) - ) - itemdat))) - (testprevvals (alist->env-vars - (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))) - ;; Launchwait defaults to true, must override it to turn off wait - (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) - (launch-results (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. - process:cmd-run-with-stderr->list - process-run) - (if useshell - (let ((cmdstr (string-intersperse fullcmd " "))) - (if launchwait - cmdstr - (conc cmdstr " >> mt_launch.log 2>&1 &"))) - (car fullcmd)) - (if useshell - '() - (cdr fullcmd))))) - (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. - ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test - (if (not launchwait) ;; give the OS a little time to allow the process to start - (thread-sleep! 0.01)) - (with-output-to-file "mt_launch.log" - (lambda () - (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) - (if (list? launch-results) - (apply print launch-results) - (print "NOTE: launched \"" fullcmd "\"\n but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n if you have problems with this")) - #:append)) - (debug:print 2 *default-log-port* "Launching completed, updating db") - (debug:print 2 *default-log-port* "Launch results: " launch-results) - (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) - (alist->env-vars miscprevvals) - (alist->env-vars testprevvals) - (alist->env-vars commonprevvals) - launch-results)) - (change-directory *toppath*))) - +(include "launch-test.scm") ;; recover a test where the top controlling mtest may have died ;; (define (launch:recover-test run-id test-id) ;; this function is called on the test run host via ssh ;; Index: process.scm ================================================================== --- process.scm +++ process.scm @@ -13,11 +13,10 @@ ;; Process convience utils ;;====================================================================== (use regex) (declare (unit process)) -;;(declare (uses common)) (define (process:conservative-read port) (let loop ((res "")) (if (not (eof-object? (peek-char port))) (loop (conc res (read-char port))) @@ -45,10 +44,36 @@ (begin (close-input-port fh) (close-input-port fhe) (close-output-port fho) result))))) ;; ) + +(define (process:cmd-run-with-stderr-and-exitcode->list cmd . params) + ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) +;; (handle-exceptions +;; exn +;; (begin +;; (print "ERROR: Failed to run command: " cmd " " (string-intersperse params " ")) +;; (print " " ((condition-property-accessor 'exn 'message) exn)) +;; #f) + (let-values (((fh fho pid fhe) (if (null? params) + (process* cmd) + (process* cmd params)))) + (let loop ((curr (read-line fh)) + (result '())) + (let ((errstr (process:conservative-read fhe))) + (if (not (string=? errstr "")) + (set! result (append result (list errstr))))) + (if (not (eof-object? curr)) + (loop (read-line fh) + (append result (list curr))) + (begin + ;(close-input-port fh) + ;(close-input-port fhe) + ;(close-output-port fho) + (let-values (((anotherpid normalexit? exitstatus) (process-wait pid))) + (list result (if normalexit? exitstatus -1)))))))) (define (process:cmd-run-proc-each-line cmd proc . params) ;; (print "Called with cmd=" cmd ", proc=" proc ", params=" params) (handle-exceptions exn