Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -570,11 +570,11 @@ (let* ((fullname (conc "logs/" file))) (if (directory? fullname) (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.") (handle-exceptions exn - (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn) + (debug:print-info 0 *default-log-port* "failed to remove " fullname ", exn=" exn) (delete-file* fullname))))) files) (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files.")))))) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified @@ -1315,25 +1315,32 @@ (debug:print-info 0 *default-log-port* "using testpatt " args-testpatt " rtestpatt:" rtestpatt) args-testpatt)))) -(define (common:false-on-exception thunk #!key (message #f)) - (handle-exceptions exn +;; +(define (common:false-on-exception thunk #!key (message #f)(tries 1)) + (handle-exceptions + exn (begin (if message - (debug:print-info 0 *default-log-port* message)) - #f) (thunk) )) - -(define (common:file-exists? path-string #!key (silent #f)) - ;; this avoids stack dumps in the case where - - ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... - (common:false-on-exception (lambda () (file-exists? path-string)) + (debug:print-info 0 *default-log-port* message " exn=" exn)) + (if (> tries 1) + (begin + (thread-sleep! 1) + (common:false-on-exception thunk message: message tries: (- tries 1))) + #f)) + (thunk))) + +(define (common:file-exists? path-string #!key (silent #f)(tries 1)) + ;; this avoids stack dumps in the case where NFS is slow or flakey + (common:false-on-exception + (lambda ()(file-exists? path-string)) message: (if (not silent) (conc "Unable to access path: " path-string) #f) + tries: tries )) (define (common:directory-exists? path-string) ;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q... (common:false-on-exception (lambda () (directory-exists? path-string)) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -62,11 +62,11 @@ (defstruct runs:testdat hed tal reg reruns test-record test-name item-path jobgroup waitons testmode newtal itemmaps - (prereqs-not-met '()) + (prereqs-not-met #f) (last-update 0) ;; ) ;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files ;; - remove any that are over 3600 seconds old @@ -833,49 +833,41 @@ ;; tal - list of never visited tests ;; prefer next hed to be from reg than tal. (define runs:nothing-left-in-queue-count 0) -(define (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps) +;; cache the result of get-prereqs-not-met and don't call it if called in past 10 seconds +;; NOTE: This is assuming that testdat is highly specific to this test +;; +(define (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) + ;; mode: testmode itemmaps: itemmaps) (if (and (runs:testdat-prereqs-not-met testdat) - (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds - (runs:testdat-prereqs-not-met testdat) - (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps))) - (if (list? res) - res - (begin - (debug:print 0 *default-log-port* - "ERROR: rmt:get-prereqs-not-met returned non-list!\n" - " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps) - '()))))) + (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;;; only refresh for this test if + ;;; it has been at least 10 seconds + (runs:testdat-prereqs-not-met testdat) ;; return the cached result + (let* ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps))) (runs:testdat-prereqs-not-met-set! testdat res) (runs:testdat-last-update-set! testdat (current-seconds)) res))) - + ;;====================================================================== ;; runs:expand-items is called by runs:run-tests-queue ;;====================================================================== ;; ;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature: ;; (let loop ((hed (car sorted-test-names)) ;; (tal (cdr sorted-test-names)) ;; (reg '()) ;; registered, put these at the head of tal ;; (reruns '())) -(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record - can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat) +(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs + run-id waitons item-path testmode test-record + can-run-more items runname tconfig reglen test-registry + test-records itemmaps testdat) (let* ((loop-list (list hed tal reg reruns)) - (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps)) - #;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))) - (if (list? res) - res - (begin - (debug:print 0 *default-log-port* - "ERROR: rmt:get-prereqs-not-met returned non-list!\n" - " res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps) - '()))) + (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path + mode: testmode itemmaps: itemmaps)) (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait))))) - ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap)) (fails (runs:calc-fails prereqs-not-met)) (prereq-fails (runs:calc-prereq-fail prereqs-not-met)) (non-completed (runs:calc-not-completed prereqs-not-met)) (runnables (runs:calc-runnable prereqs-not-met)) (unexpanded-prereqs @@ -1811,13 +1803,13 @@ (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") (let ((can-run-more #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (not can-run-more) #;(and (list? can-run-more) ;; IDEA, this mechanism may have had some value, make it configurable to test pros/cons TODO (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs - max-concurrent-jobs run-id waitons item-path - testmode test-record can-run-more items runname - tconfig reglen test-registry test-records itemmaps))) + run-id waitons item-path testmode test-record + can-run-more items runname tconfig reglen test-registry + test-records itemmaps testdat))) (if loop-list (apply loop loop-list) (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns))))