Index: TODO ================================================================== --- TODO +++ TODO @@ -42,10 +42,11 @@ . break command line into sections; all, run control, queries, utilities etc. . pull in ftfplan (not integrated, just code pulled in) WW20 . ./configure => ubuntu, sles11, sles12, rh7 +. Jenkins junit XML support . Add output flushing in teamcity support . Switch to using simple runs query everywhere . Add end_time to runs and add a rollup call that sets state, status and end_time Future Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -720,11 +720,10 @@ (if got-all (begin (dboard:rundat-last-update-set! run-dat (- start-time 2)) (dboard:rundat-run-data-offset-set! run-dat 0)) (begin - ;;; (thread-sleep! 0.25) ;; give the rest of the gui some time to update. <-- this did NOT help (dboard:rundat-run-data-offset-set! run-dat (+ num-to-get (dboard:rundat-run-data-offset run-dat))))) (for-each (lambda (tdat) @@ -838,13 +837,11 @@ ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; (define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts) - (dboard:tabdat-already-running-set! tabdat #t) - (let* (;; (already-running (dboard:tabdat-already-running tabdat)) - (access-mode (dboard:tabdat-access-mode tabdat)) + (let* ((access-mode (dboard:tabdat-access-mode tabdat)) (keys (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys))) (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2)) (allruns (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f)) (allruns-tree (rmt:get-runs-by-patt keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname") @@ -3727,17 +3724,10 @@ ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) - ;; this seems like a good place to check for already running and skip if so - ;; - ;; (set! *updater-running* #t) -;;(if (dboard:tabdat-already-running tabdat) -;; (begin -;; (debug:print-info 0 *default-log-port* "Dashboard overloaded - updates will be slow, skipping update.") -;; (dboard:tabdat-target tabdat)) (dboard:update-rundat tabdat (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") (dboard:tabdat-numruns tabdat) (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -462,13 +462,14 @@ (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. - ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun - (and (equal? (db:test-get-state test-info) "COMPLETED") ;; completed/abort => rerun if asked - (member (db:test-get-status test-info) '("ABORT")))) + ;; ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun + ;; (and (equal? (db:test-get-state test-info) "COMPLETED") ;; completed/abort => rerun if asked + ;; (member (db:test-get-status test-info) '("ABORT")))) + ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -1783,17 +1783,14 @@ ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") - (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) - (if (and (list? can-run-more) - (car can-run-more)) ;; itemized test expanded here - (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup - max-concurrent-jobs run-id waitons item-path - testmode test-record can-run-more items runname - tconfig reglen test-registry test-records itemmaps))) + (let ((can-run-more #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) + (if (not can-run-more) #;(and (list? can-run-more) + (car can-run-more)) + (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) Index: server.scm ================================================================== --- server.scm +++ server.scm @@ -319,13 +319,13 @@ (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) -;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails +;; wait for server=start-last to be three seconds old ;; -#;(define (server:wait-for-server-start-last-flag areapath) +(define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (reftime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) (if (file-exists? start-flag) @@ -347,13 +347,13 @@ (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! reftime) (server:wait-for-server-start-last-flag areapath))))))) -;; wait for server=start-last to be three seconds old +;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails ;; -(define (server:wait-for-server-start-last-flag areapath) +#;(define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (reftime (+ 2 (random 3))) (delta (- (current-seconds) fmodtime)) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1987,10 +1987,32 @@ (tmpfree (get-df "/tmp")) (uname (get-uname "-srvpio")) (hostname (get-host-name))) (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree))) +;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) +#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) + (let* ((cpuload (get-cpu-load)) + (diskfree (get-df (current-directory))) + (remtries 10)) + (handle-exceptions + exn + (if (> remtries 0) + (begin + (print-call-chain (current-error-port)) + (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") + (set! remtries (- remtries 1)) + (thread-sleep! 10) + (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) + (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) + (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") + (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") + (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) + (debug:print 5 *default-log-port* "exn=" (condition->list exn)) + (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) + (print-call-chain (current-error-port)))) + (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)))) ;;====================================================================== ;; A R C H I V I N G ;;======================================================================